Commit d6cf6b38 by Eric Holk

ARM32 Vector lowering - scalarize select

With this change, we pass the select crosstest. Since this would have introduced a three-argument version of scalarizeInstruction, I decided to generalize it using templates. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1683243003 .
parent b35615b4
...@@ -448,7 +448,8 @@ check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime ...@@ -448,7 +448,8 @@ check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime
-i x8664,sandbox,sse4.1,Om1 \ -i x8664,sandbox,sse4.1,Om1 \
-i arm32,neon \ -i arm32,neon \
-e arm32,neon,test_vector_ops \ -e arm32,neon,test_vector_ops \
-e arm32,neon,test_select -e arm32,nonsfi \
-e arm32,neon,test_vector_ops
PNACL_BIN_PATH=$(PNACL_BIN_PATH) \ PNACL_BIN_PATH=$(PNACL_BIN_PATH) \
$(LLVM_SRC_PATH)/utils/lit/lit.py -sv $(CHECK_XTEST_TESTS) $(LLVM_SRC_PATH)/utils/lit/lit.py -sv $(CHECK_XTEST_TESTS)
endif endif
......
...@@ -313,6 +313,7 @@ TargetLowering::AutoBundle::~AutoBundle() { ...@@ -313,6 +313,7 @@ TargetLowering::AutoBundle::~AutoBundle() {
} }
void TargetLowering::genTargetHelperCalls() { void TargetLowering::genTargetHelperCalls() {
Utils::BoolFlagSaver _(GeneratingTargetHelpers, true);
for (CfgNode *Node : Func->getNodes()) { for (CfgNode *Node : Func->getNodes()) {
Context.init(Node); Context.init(Node);
while (!Context.atEnd()) { while (!Context.atEnd()) {
...@@ -711,10 +712,9 @@ void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind, ...@@ -711,10 +712,9 @@ void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind,
Variable *Dest, Operand *Src0, Variable *Dest, Operand *Src0,
Operand *Src1) { Operand *Src1) {
scalarizeInstruction( scalarizeInstruction(
Dest, Src0, Src1, Dest, [this, Kind](Variable *Dest, Operand *Src0, Operand *Src1) {
[this, Kind](Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1); return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1);
}); }, Src0, Src1);
} }
void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C, void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C,
......
...@@ -325,6 +325,10 @@ private: ...@@ -325,6 +325,10 @@ private:
// locking/unlocking) to prevent nested bundles. // locking/unlocking) to prevent nested bundles.
bool AutoBundling = false; bool AutoBundling = false;
/// This indicates whether we are in the genTargetHelperCalls phase, and
/// therefore can do things like scalarization.
bool GeneratingTargetHelpers = false;
// _bundle_lock(), and _bundle_unlock(), were made private to force subtargets // _bundle_lock(), and _bundle_unlock(), were made private to force subtargets
// to use the AutoBundle helper. // to use the AutoBundle helper.
void void
...@@ -469,39 +473,42 @@ protected: ...@@ -469,39 +473,42 @@ protected:
/// Generalizes scalarizeArithmetic to support other instruction types. /// Generalizes scalarizeArithmetic to support other instruction types.
/// ///
/// MakeInstruction is a function-like object with signature /// insertScalarInstruction is a function-like object with signature
/// (Variable *Dest, Variable *Src0, Variable *Src1) -> Instr *. /// (Variable *Dest, Variable *Src0, Variable *Src1) -> Instr *.
template <typename F> template <typename... Operands,
void scalarizeInstruction(Variable *Dest, Operand *Src0, Operand *Src1, typename F = std::function<Inst *(Variable *, Operands *...)>>
F &&MakeInstruction) { void scalarizeInstruction(Variable *Dest, F insertScalarInstruction,
Operands *... Srcs) {
assert(GeneratingTargetHelpers &&
"scalarizeInstruction called during incorrect phase");
const Type DestTy = Dest->getType(); const Type DestTy = Dest->getType();
assert(isVectorType(DestTy)); assert(isVectorType(DestTy));
const Type DestElementTy = typeElementType(DestTy); const Type DestElementTy = typeElementType(DestTy);
const SizeT NumElements = typeNumElements(DestTy); const SizeT NumElements = typeNumElements(DestTy);
const Type Src0ElementTy = typeElementType(Src0->getType());
const Type Src1ElementTy = typeElementType(Src1->getType());
assert(NumElements == typeNumElements(Src0->getType()));
assert(NumElements == typeNumElements(Src1->getType()));
Variable *T = Func->makeVariable(DestTy); Variable *T = Func->makeVariable(DestTy);
Context.insert<InstFakeDef>(T); Context.insert<InstFakeDef>(T);
for (SizeT I = 0; I < NumElements; ++I) { for (SizeT I = 0; I < NumElements; ++I) {
Constant *Index = Ctx->getConstantInt32(I); auto *Index = Ctx->getConstantInt32(I);
// Extract the next two inputs. auto makeExtractThunk = [this, Index, NumElements](Operand *Src) {
Variable *Op0 = Func->makeVariable(Src0ElementTy); return [this, Index, NumElements, Src]() {
Context.insert<InstExtractElement>(Op0, Src0, Index); assert(typeNumElements(Src->getType()) == NumElements);
Variable *Op1 = Func->makeVariable(Src1ElementTy);
Context.insert<InstExtractElement>(Op1, Src1, Index); const auto ElementTy = typeElementType(Src->getType());
auto *Op = Func->makeVariable(ElementTy);
Context.insert<InstExtractElement>(Op, Src, Index);
return Op;
};
};
// Perform the operation as a scalar operation. // Perform the operation as a scalar operation.
Variable *Res = Func->makeVariable(DestElementTy); auto *Res = Func->makeVariable(DestElementTy);
auto Arith = MakeInstruction(Res, Op0, Op1); auto *Arith = applyToThunkedArgs(insertScalarInstruction, Res,
// We might have created an operation that needed a helper call. makeExtractThunk(Srcs)...);
genTargetHelperCallFor(Arith); genTargetHelperCallFor(Arith);
// Insert the result into position.
Variable *DestT = Func->makeVariable(DestTy); Variable *DestT = Func->makeVariable(DestTy);
Context.insert<InstInsertElement>(DestT, T, Res, Index); Context.insert<InstInsertElement>(DestT, T, Res, Index);
T = DestT; T = DestT;
...@@ -509,38 +516,38 @@ protected: ...@@ -509,38 +516,38 @@ protected:
Context.insert<InstAssign>(Dest, T); Context.insert<InstAssign>(Dest, T);
} }
template <typename F> // applyToThunkedArgs is used by scalarizeInstruction. Ideally, we would just
void scalarizeUnaryInstruction(Variable *Dest, Operand *Src0, // call insertScalarInstruction(Res, Srcs...), but C++ does not specify
F &&MakeInstruction) { // evaluation order which means this leads to an unpredictable final
const Type DestTy = Dest->getType(); // output. Instead, we wrap each of the Srcs in a thunk and these
assert(isVectorType(DestTy)); // applyToThunkedArgs functions apply the thunks in a well defined order so we
const Type DestElementTy = typeElementType(DestTy); // still get well-defined output.
const SizeT NumElements = typeNumElements(DestTy); Inst *applyToThunkedArgs(
const Type Src0ElementTy = typeElementType(Src0->getType()); std::function<Inst *(Variable *, Variable *)> insertScalarInstruction,
Variable *Res, std::function<Variable *()> thunk0) {
assert(NumElements == typeNumElements(Src0->getType())); auto *Src0 = thunk0();
return insertScalarInstruction(Res, Src0);
Variable *T = Func->makeVariable(DestTy); }
Context.insert<InstFakeDef>(T);
for (SizeT I = 0; I < NumElements; ++I) {
Constant *Index = Ctx->getConstantInt32(I);
// Extract the next two inputs.
Variable *Op0 = Func->makeVariable(Src0ElementTy);
Context.insert<InstExtractElement>(Op0, Src0, Index);
// Perform the operation as a scalar operation. Inst *
Variable *Res = Func->makeVariable(DestElementTy); applyToThunkedArgs(std::function<Inst *(Variable *, Variable *, Variable *)>
auto Arith = MakeInstruction(Res, Op0); insertScalarInstruction,
// We might have created an operation that needed a helper call. Variable *Res, std::function<Variable *()> thunk0,
genTargetHelperCallFor(Arith); std::function<Variable *()> thunk1) {
auto *Src0 = thunk0();
auto *Src1 = thunk1();
return insertScalarInstruction(Res, Src0, Src1);
}
// Insert the result into position. Inst *applyToThunkedArgs(
Variable *DestT = Func->makeVariable(DestTy); std::function<Inst *(Variable *, Variable *, Variable *, Variable *)>
Context.insert<InstInsertElement>(DestT, T, Res, Index); insertScalarInstruction,
T = DestT; Variable *Res, std::function<Variable *()> thunk0,
} std::function<Variable *()> thunk1, std::function<Variable *()> thunk2) {
Context.insert<InstAssign>(Dest, T); auto *Src0 = thunk0();
auto *Src1 = thunk1();
auto *Src2 = thunk2();
return insertScalarInstruction(Res, Src0, Src1, Src2);
} }
/// SandboxType enumerates all possible sandboxing strategies that /// SandboxType enumerates all possible sandboxing strategies that
......
...@@ -592,10 +592,10 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) { ...@@ -592,10 +592,10 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
const InstCast::OpKind CastKind = CastInstr->getCastKind(); const InstCast::OpKind CastKind = CastInstr->getCastKind();
if (isVectorType(DestTy)) { if (isVectorType(DestTy)) {
scalarizeUnaryInstruction( scalarizeInstruction(
Dest, Src0, [this, CastKind](Variable *Dest, Variable *Src) { Dest, [this, CastKind](Variable *Dest, Variable *Src) {
return Context.insert<InstCast>(CastKind, Dest, Src); return Context.insert<InstCast>(CastKind, Dest, Src);
}); }, Src0);
CastInstr->setDeleted(); CastInstr->setDeleted();
return; return;
} }
...@@ -753,10 +753,11 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) { ...@@ -753,10 +753,11 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
auto *CmpInstr = llvm::cast<InstIcmp>(Instr); auto *CmpInstr = llvm::cast<InstIcmp>(Instr);
const auto Condition = CmpInstr->getCondition(); const auto Condition = CmpInstr->getCondition();
scalarizeInstruction( scalarizeInstruction(
Dest, CmpInstr->getSrc(0), CmpInstr->getSrc(1), Dest,
[this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) { [this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstIcmp>(Condition, Dest, Src0, Src1); return Context.insert<InstIcmp>(Condition, Dest, Src0, Src1);
}); },
CmpInstr->getSrc(0), CmpInstr->getSrc(1));
CmpInstr->setDeleted(); CmpInstr->setDeleted();
} }
return; return;
...@@ -768,14 +769,33 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) { ...@@ -768,14 +769,33 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
auto *CmpInstr = llvm::cast<InstFcmp>(Instr); auto *CmpInstr = llvm::cast<InstFcmp>(Instr);
const auto Condition = CmpInstr->getCondition(); const auto Condition = CmpInstr->getCondition();
scalarizeInstruction( scalarizeInstruction(
Dest, CmpInstr->getSrc(0), CmpInstr->getSrc(1), Dest,
[this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) { [this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstFcmp>(Condition, Dest, Src0, Src1); return Context.insert<InstFcmp>(Condition, Dest, Src0, Src1);
}); },
CmpInstr->getSrc(0), CmpInstr->getSrc(1));
CmpInstr->setDeleted(); CmpInstr->setDeleted();
} }
return; return;
} }
case Inst::Select: {
Variable *Dest = Instr->getDest();
const auto DestTy = Dest->getType();
if (isVectorType(DestTy)) {
auto *SelectInstr = llvm::cast<InstSelect>(Instr);
scalarizeInstruction(Dest,
[this](Variable *Dest, Variable *Src0,
Variable *Src1, Variable *Src2) {
return Context.insert<InstSelect>(Dest, Src0, Src1,
Src2);
},
llvm::cast<Variable>(SelectInstr->getSrc(0)),
llvm::cast<Variable>(SelectInstr->getSrc(1)),
llvm::cast<Variable>(SelectInstr->getSrc(2)));
SelectInstr->setDeleted();
}
return;
}
} }
} }
......
...@@ -87,21 +87,7 @@ template <> struct PoolTypeConverter<uint8_t> { ...@@ -87,21 +87,7 @@ template <> struct PoolTypeConverter<uint8_t> {
namespace X86NAMESPACE { namespace X86NAMESPACE {
/// A helper class to ease the settings of RandomizationPoolingPause to disable using Utils::BoolFlagSaver;
/// constant blinding or pooling for some translation phases.
class BoolFlagSaver {
BoolFlagSaver() = delete;
BoolFlagSaver(const BoolFlagSaver &) = delete;
BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
public:
BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
~BoolFlagSaver() { Flag = OldValue; }
private:
const bool OldValue;
bool &Flag;
};
template <typename Traits> class BoolFoldingEntry { template <typename Traits> class BoolFoldingEntry {
BoolFoldingEntry(const BoolFoldingEntry &) = delete; BoolFoldingEntry(const BoolFoldingEntry &) = delete;
......
...@@ -123,6 +123,25 @@ template <typename T> static bool isPositiveZero(T Val) { ...@@ -123,6 +123,25 @@ template <typename T> static bool isPositiveZero(T Val) {
return Val == 0 && !std::signbit(Val); return Val == 0 && !std::signbit(Val);
} }
/// An RAII class to ensure that a boolean flag is restored to its previous
/// value upon function exit.
///
/// Used in places like RandomizationPoolingPause and generating target helper
/// calls.
class BoolFlagSaver {
BoolFlagSaver() = delete;
BoolFlagSaver(const BoolFlagSaver &) = delete;
BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
public:
BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
~BoolFlagSaver() { Flag = OldValue; }
private:
const bool OldValue;
bool &Flag;
};
} // end of namespace Utils } // end of namespace Utils
} // end of namespace Ice } // end of namespace Ice
......
; Test that we handle select on vectors.
; TODO(eholk): This test will need to be updated once comparison is no longer
; scalarized.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=ASM
define internal <4 x float> @select4float(<4 x i1> %s, <4 x float> %a,
<4 x float> %b) {
; ASM-LABEL:select4float:
; DIS-LABEL:00000000 <select4float>:
entry:
%res = select <4 x i1> %s, <4 x float> %a, <4 x float> %b
; ASM: # q3 = def.pseudo
; ASM-NEXT: vmov.s8 r0, d0[0]
; ASM-NEXT: vmov.f32 s16, s4
; ASM-NEXT: vmov.f32 s17, s8
; ASM-NEXT: tst r0, #1
; ASM-NEXT: vmovne.f32 s17, s16
; ASM-NEXT: vmov.f32 s12, s17
; ASM-NEXT: vmov.s8 r0, d0[1]
; ASM-NEXT: vmov.f32 s16, s5
; ASM-NEXT: vmov.f32 s17, s9
; ASM-NEXT: tst r0, #1
; ASM-NEXT: vmovne.f32 s17, s16
; ASM-NEXT: vmov.f32 s13, s17
; ASM-NEXT: vmov.s8 r0, d1[0]
; ASM-NEXT: vmov.f32 s16, s6
; ASM-NEXT: vmov.f32 s17, s10
; ASM-NEXT: tst r0, #1
; ASM-NEXT: vmovne.f32 s17, s16
; ASM-NEXT: vmov.f32 s14, s17
; ASM-NEXT: vmov.s8 r0, d1[1]
; ASM-NEXT: vmov.f32 s4, s7
; ASM-NEXT: vmov.f32 s8, s11
; ASM-NEXT: tst r0, #1
; ASM-NEXT: vmovne.f32 s8, s4
; ASM-NEXT: vmov.f32 s15, s8
; ASM-NEXT: vmov.f32 q0, q3
; ASM-NEXT: vpop {s16, s17}
; ASM-NEXT: # s16 = def.pseudo
; ASM-NEXT: # s17 = def.pseudo
; ASM-NEXT: bx lr
ret <4 x float> %res
}
define internal <4 x i32> @select4i32(<4 x i1> %s, <4 x i32> %a, <4 x i32> %b) {
; ASM-LABEL:select4i32:
; DIS-LABEL:00000000 <select4i32>:
entry:
%res = select <4 x i1> %s, <4 x i32> %a, <4 x i32> %b
; ASM: # q3 = def.pseudo
; ASM-NEXT: vmov.s8 r0, d0[0]
; ASM-NEXT: vmov.32 r1, d2[0]
; ASM-NEXT: vmov.32 r2, d4[0]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.32 d6[0], r2
; ASM-NEXT: vmov.s8 r0, d0[1]
; ASM-NEXT: vmov.32 r1, d2[1]
; ASM-NEXT: vmov.32 r2, d4[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.32 d6[1], r2
; ASM-NEXT: vmov.s8 r0, d1[0]
; ASM-NEXT: vmov.32 r1, d3[0]
; ASM-NEXT: vmov.32 r2, d5[0]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.32 d7[0], r2
; ASM-NEXT: vmov.s8 r0, d1[1]
; ASM-NEXT: vmov.32 r1, d3[1]
; ASM-NEXT: vmov.32 r2, d5[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.32 d7[1], r2
; ASM-NEXT: vmov.i32 q0, q3
; ASM-NEXT: bx lr
ret <4 x i32> %res
}
define internal <8 x i16> @select8i16(<8 x i1> %s, <8 x i16> %a, <8 x i16> %b) {
; ASM-LABEL:select8i16:
; DIS-LABEL:00000000 <select8i16>:
entry:
%res = select <8 x i1> %s, <8 x i16> %a, <8 x i16> %b
; ASM: # q3 = def.pseudo
; ASM-NEXT: vmov.s8 r0, d0[0]
; ASM-NEXT: vmov.s16 r1, d2[0]
; ASM-NEXT: vmov.s16 r2, d4[0]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[0], r2
; ASM-NEXT: vmov.s8 r0, d0[1]
; ASM-NEXT: vmov.s16 r1, d2[1]
; ASM-NEXT: vmov.s16 r2, d4[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[1], r2
; ASM-NEXT: vmov.s8 r0, d0[2]
; ASM-NEXT: vmov.s16 r1, d2[2]
; ASM-NEXT: vmov.s16 r2, d4[2]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[2], r2
; ASM-NEXT: vmov.s8 r0, d0[3]
; ASM-NEXT: vmov.s16 r1, d2[3]
; ASM-NEXT: vmov.s16 r2, d4[3]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[3], r2
; ASM-NEXT: vmov.s8 r0, d1[0]
; ASM-NEXT: vmov.s16 r1, d3[0]
; ASM-NEXT: vmov.s16 r2, d5[0]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[0], r2
; ASM-NEXT: vmov.s8 r0, d1[1]
; ASM-NEXT: vmov.s16 r1, d3[1]
; ASM-NEXT: vmov.s16 r2, d5[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[1], r2
; ASM-NEXT: vmov.s8 r0, d1[2]
; ASM-NEXT: vmov.s16 r1, d3[2]
; ASM-NEXT: vmov.s16 r2, d5[2]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[2], r2
; ASM-NEXT: vmov.s8 r0, d1[3]
; ASM-NEXT: vmov.s16 r1, d3[3]
; ASM-NEXT: vmov.s16 r2, d5[3]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[3], r2
; ASM-NEXT: vmov.i16 q0, q3
; ASM-NEXT: bx lr
ret <8 x i16> %res
}
define internal <16 x i8> @select16i8(<16 x i1> %s, <16 x i8> %a,
<16 x i8> %b) {
; ASM-LABEL:select16i8:
; DIS-LABEL:00000000 <select16i8>:
entry:
%res = select <16 x i1> %s, <16 x i8> %a, <16 x i8> %b
; ASM: # q3 = def.pseudo
; ASM-NEXT: vmov.s8 r0, d0[0]
; ASM-NEXT: vmov.s8 r1, d2[0]
; ASM-NEXT: vmov.s8 r2, d4[0]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[0], r2
; ASM-NEXT: vmov.s8 r0, d0[1]
; ASM-NEXT: vmov.s8 r1, d2[1]
; ASM-NEXT: vmov.s8 r2, d4[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[1], r2
; ASM-NEXT: vmov.s8 r0, d0[2]
; ASM-NEXT: vmov.s8 r1, d2[2]
; ASM-NEXT: vmov.s8 r2, d4[2]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[2], r2
; ASM-NEXT: vmov.s8 r0, d0[3]
; ASM-NEXT: vmov.s8 r1, d2[3]
; ASM-NEXT: vmov.s8 r2, d4[3]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[3], r2
; ASM-NEXT: vmov.s8 r0, d0[4]
; ASM-NEXT: vmov.s8 r1, d2[4]
; ASM-NEXT: vmov.s8 r2, d4[4]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[4], r2
; ASM-NEXT: vmov.s8 r0, d0[5]
; ASM-NEXT: vmov.s8 r1, d2[5]
; ASM-NEXT: vmov.s8 r2, d4[5]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[5], r2
; ASM-NEXT: vmov.s8 r0, d0[6]
; ASM-NEXT: vmov.s8 r1, d2[6]
; ASM-NEXT: vmov.s8 r2, d4[6]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[6], r2
; ASM-NEXT: vmov.s8 r0, d0[7]
; ASM-NEXT: vmov.s8 r1, d2[7]
; ASM-NEXT: vmov.s8 r2, d4[7]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[7], r2
; ASM-NEXT: vmov.s8 r0, d1[0]
; ASM-NEXT: vmov.s8 r1, d3[0]
; ASM-NEXT: vmov.s8 r2, d5[0]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[0], r2
; ASM-NEXT: vmov.s8 r0, d1[1]
; ASM-NEXT: vmov.s8 r1, d3[1]
; ASM-NEXT: vmov.s8 r2, d5[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[1], r2
; ASM-NEXT: vmov.s8 r0, d1[2]
; ASM-NEXT: vmov.s8 r1, d3[2]
; ASM-NEXT: vmov.s8 r2, d5[2]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[2], r2
; ASM-NEXT: vmov.s8 r0, d1[3]
; ASM-NEXT: vmov.s8 r1, d3[3]
; ASM-NEXT: vmov.s8 r2, d5[3]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[3], r2
; ASM-NEXT: vmov.s8 r0, d1[4]
; ASM-NEXT: vmov.s8 r1, d3[4]
; ASM-NEXT: vmov.s8 r2, d5[4]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[4], r2
; ASM-NEXT: vmov.s8 r0, d1[5]
; ASM-NEXT: vmov.s8 r1, d3[5]
; ASM-NEXT: vmov.s8 r2, d5[5]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[5], r2
; ASM-NEXT: vmov.s8 r0, d1[6]
; ASM-NEXT: vmov.s8 r1, d3[6]
; ASM-NEXT: vmov.s8 r2, d5[6]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[6], r2
; ASM-NEXT: vmov.s8 r0, d1[7]
; ASM-NEXT: vmov.s8 r1, d3[7]
; ASM-NEXT: vmov.s8 r2, d5[7]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[7], r2
; ASM-NEXT: vmov.i8 q0, q3
; ASM-NEXT: bx lr
ret <16 x i8> %res
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment