Commit 675e15b3 by Nicolas Capens Committed by Nicolas Capens

Implement vector intrinsics for ARM32.

MultiplyAddPairs is implemented using VMULL+VPADD. MultiplyHighSigned/Unsigned is implemented using VMULL+VSHRN. SubVectorLoad/Store is implemented using VLDR/VLD1/VSTR/VST1. VectorPackSigned/Unsigned is implemented using two VQMOVN. Bug b/37496078 Bug b/37496856 Bug b/37496321 Bug b/37496082 Change-Id: I141fd901d53da24ce780f503dc7ad17b94fc6ba8 Reviewed-on: https://chromium-review.googlesource.com/693049Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org> Reviewed-on: https://swiftshader-review.googlesource.com/12709Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 4e759e4e
...@@ -440,16 +440,34 @@ public: ...@@ -440,16 +440,34 @@ public:
vldrs(OpSd, OpAddress, Cond, TInfo); vldrs(OpSd, OpAddress, Cond, TInfo);
} }
void vldrq(const Operand *OpQd, const Operand *OpAddress,
CondARM32::Cond Cond, const TargetInfo &TInfo);
void vldrq(const Operand *OpQd, const Operand *OpAddress,
CondARM32::Cond Cond, const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering);
vldrq(OpQd, OpAddress, Cond, TInfo);
}
// ElmtSize = #bits in vector element. // ElmtSize = #bits in vector element.
void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn, void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetInfo &TInfo); const TargetInfo &TInfo);
void vld1(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetInfo &TInfo);
void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn, void vld1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetLowering *Lowering) { const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering); const TargetInfo TInfo(Lowering);
vld1qr(ElmtSize, OpQd, OpRn, TInfo); vld1qr(ElmtSize, OpQd, OpRn, TInfo);
} }
void vld1(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering);
vld1(ElmtSize, OpQd, OpRn, TInfo);
}
// Qn[i] = Imm for all i in vector. Returns true iff Imm can be defined as an // Qn[i] = Imm for all i in vector. Returns true iff Imm can be defined as an
// Imm8 using AdvSIMDExpandImm(). // Imm8 using AdvSIMDExpandImm().
bool vmovqc(const Operand *OpQd, const ConstantInteger32 *Imm); bool vmovqc(const Operand *OpQd, const ConstantInteger32 *Imm);
...@@ -520,6 +538,14 @@ public: ...@@ -520,6 +538,14 @@ public:
void vmulqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQn, void vmulqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQn,
const Operand *OpQm); const Operand *OpQm);
// Integer vector multiply high.
void vmulh(Type ElmtTy, const Operand *OpQd, const Operand *OpQn,
const Operand *OpQm, bool Unsigned);
// Integer vector multiply add pairwise.
void vmlap(Type ElmtTy, const Operand *OpQd, const Operand *OpQn,
const Operand *OpQm);
// Float vector multiply. // Float vector multiply.
void vmulqf(const Operand *OpQd, const Operand *OpQn, const Operand *OpQm); void vmulqf(const Operand *OpQd, const Operand *OpQn, const Operand *OpQm);
...@@ -574,6 +600,15 @@ public: ...@@ -574,6 +600,15 @@ public:
vstrs(OpSd, OpAddress, Cond, TInfo); vstrs(OpSd, OpAddress, Cond, TInfo);
} }
void vstrq(const Operand *OpQd, const Operand *OpAddress,
CondARM32::Cond Cond, const TargetInfo &TInfo);
void vstrq(const Operand *OpQd, const Operand *OpAddress,
CondARM32::Cond Cond, const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering);
vstrq(OpQd, OpAddress, Cond, TInfo);
}
// ElmtSize = #bits in vector element. // ElmtSize = #bits in vector element.
void vst1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpAddress, void vst1qr(size_t ElmtSize, const Operand *OpQd, const Operand *OpAddress,
const TargetInfo &TInfo); const TargetInfo &TInfo);
...@@ -584,6 +619,15 @@ public: ...@@ -584,6 +619,15 @@ public:
vst1qr(ElmtSize, OpQd, OpRn, TInfo); vst1qr(ElmtSize, OpQd, OpRn, TInfo);
} }
void vst1(size_t ElmtSize, const Operand *OpQd, const Operand *OpAddress,
const TargetInfo &TInfo);
void vst1(size_t ElmtSize, const Operand *OpQd, const Operand *OpRn,
const TargetLowering *Lowering) {
const TargetInfo TInfo(Lowering);
vst1(ElmtSize, OpQd, OpRn, TInfo);
}
void vsubd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm, void vsubd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
CondARM32::Cond Cond); CondARM32::Cond Cond);
...@@ -603,6 +647,10 @@ public: ...@@ -603,6 +647,10 @@ public:
void vqaddqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm, void vqaddqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn); const Operand *OpQn);
// Integer vector packing with optional saturation.
void vqmovn2(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn, bool Unsigned, bool Saturating);
// Float vector subtract // Float vector subtract
void vsubqf(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn); void vsubqf(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
...@@ -732,6 +780,11 @@ private: ...@@ -732,6 +780,11 @@ private:
DRegListSize NumDRegs, size_t ElmtSize, IValueT Align, DRegListSize NumDRegs, size_t ElmtSize, IValueT Align,
const char *InstName); const char *InstName);
// Pattern 111100000D00nnnnddddss00aaaammmm | Opcode where Ddddd=Dd, nnnn=Rn,
// mmmmm=Rm, ElmtSize in {8, 16, 32) and defines ss, and aa=Align.
void emitVMem1Op(IValueT Opcode, IValueT Dd, IValueT Rn, IValueT Rm,
size_t ElmtSize, IValueT Align, const char *InstName);
// Pattern cccc011100x1dddd1111mmmm0001nnn where cccc=Cond, // Pattern cccc011100x1dddd1111mmmm0001nnn where cccc=Cond,
// x=Opcode, dddd=Rd, nnnn=Rn, mmmm=Rm. // x=Opcode, dddd=Rd, nnnn=Rn, mmmm=Rm.
void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn, void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
......
...@@ -1087,6 +1087,87 @@ template <> void InstARM32Vqsub::emitIAS(const Cfg *Func) const { ...@@ -1087,6 +1087,87 @@ template <> void InstARM32Vqsub::emitIAS(const Cfg *Func) const {
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
} }
template <> void InstARM32Vqmovn2::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Operand *Src0 = getSrc(0);
const Operand *Src1 = getSrc(1);
Type SrcTy = Src0->getType();
Type DestTy = Dest->getType();
bool Unsigned = true;
bool Saturating = true;
switch (SrcTy) {
default:
llvm::report_fatal_error("Vqmovn2 not defined on type " +
typeStdString(SrcTy));
case IceType_v8i16:
case IceType_v4i32:
switch (Sign) {
case InstARM32::FS_None:
Unsigned = true;
Saturating = false;
Asm->vqmovn2(typeElementType(DestTy), Dest, Src0, Src1, Unsigned,
Saturating);
break;
case InstARM32::FS_Unsigned:
Unsigned = true;
Saturating = true;
Asm->vqmovn2(typeElementType(DestTy), Dest, Src0, Src1, Unsigned,
Saturating);
break;
case InstARM32::FS_Signed:
Unsigned = false;
Saturating = true;
Asm->vqmovn2(typeElementType(DestTy), Dest, Src0, Src1, Unsigned,
Saturating);
break;
}
break;
}
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vmulh::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Operand *Src0 = getSrc(0);
Type SrcTy = Src0->getType();
bool Unsigned = true;
switch (SrcTy) {
default:
llvm::report_fatal_error("Vmulh not defined on type " +
typeStdString(SrcTy));
case IceType_v8i16:
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
case InstARM32::FS_Unsigned:
Unsigned = true;
Asm->vmulh(typeElementType(SrcTy), Dest, getSrc(0), getSrc(1), Unsigned);
break;
case InstARM32::FS_Signed:
Unsigned = false;
Asm->vmulh(typeElementType(SrcTy), Dest, getSrc(0), getSrc(1), Unsigned);
break;
}
break;
}
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vmlap::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Operand *Src0 = getSrc(0);
const Operand *Src1 = getSrc(1);
Type SrcTy = Src0->getType();
switch (SrcTy) {
default:
llvm::report_fatal_error("Vmlap not defined on type " +
typeStdString(SrcTy));
case IceType_v8i16:
Asm->vmlap(typeElementType(SrcTy), Dest, Src0, Src1);
break;
}
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const { template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest(); const Variable *Dest = getDest();
...@@ -1336,6 +1417,14 @@ InstARM32Strex::InstARM32Strex(Cfg *Func, Variable *Dest, Variable *Value, ...@@ -1336,6 +1417,14 @@ InstARM32Strex::InstARM32Strex(Cfg *Func, Variable *Dest, Variable *Value,
addSource(Mem); addSource(Mem);
} }
InstARM32Vstr1::InstARM32Vstr1(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
CondARM32::Cond Predicate, SizeT Size)
: InstARM32Pred(Func, InstARM32::Vstr1, 2, nullptr, Predicate) {
addSource(Value);
addSource(Mem);
this->Size = Size;
}
InstARM32Trap::InstARM32Trap(Cfg *Func) InstARM32Trap::InstARM32Trap(Cfg *Func)
: InstARM32(Func, InstARM32::Trap, 0, nullptr) {} : InstARM32(Func, InstARM32::Trap, 0, nullptr) {}
...@@ -1654,6 +1743,8 @@ template <> const char *InstARM32Vsqrt::Opcode = "vsqrt"; ...@@ -1654,6 +1743,8 @@ template <> const char *InstARM32Vsqrt::Opcode = "vsqrt";
// Mov-like ops // Mov-like ops
template <> const char *InstARM32Ldr::Opcode = "ldr"; template <> const char *InstARM32Ldr::Opcode = "ldr";
template <> const char *InstARM32Ldrex::Opcode = "ldrex"; template <> const char *InstARM32Ldrex::Opcode = "ldrex";
template <> const char *InstARM32Vldr1d::Opcode = "vldr1d";
template <> const char *InstARM32Vldr1q::Opcode = "vldr1q";
// Three-addr ops // Three-addr ops
template <> const char *InstARM32Adc::Opcode = "adc"; template <> const char *InstARM32Adc::Opcode = "adc";
template <> const char *InstARM32Add::Opcode = "add"; template <> const char *InstARM32Add::Opcode = "add";
...@@ -1693,6 +1784,12 @@ template <> ...@@ -1693,6 +1784,12 @@ template <>
const char *InstARM32ThreeAddrFP<InstARM32::Vqadd>::Opcode = "vqadd"; const char *InstARM32ThreeAddrFP<InstARM32::Vqadd>::Opcode = "vqadd";
template <> template <>
const char *InstARM32ThreeAddrFP<InstARM32::Vqsub>::Opcode = "vqsub"; const char *InstARM32ThreeAddrFP<InstARM32::Vqsub>::Opcode = "vqsub";
template <>
const char *InstARM32ThreeAddrFP<InstARM32::Vqmovn2>::Opcode = "vqmovn2";
template <>
const char *InstARM32ThreeAddrFP<InstARM32::Vmulh>::Opcode = "vmulh";
template <>
const char *InstARM32ThreeAddrFP<InstARM32::Vmlap>::Opcode = "vmlap";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls"; template <> const char *InstARM32Mls::Opcode = "mls";
...@@ -2154,6 +2251,62 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const { ...@@ -2154,6 +2251,62 @@ template <> void InstARM32Ldr::emit(const Cfg *Func) const {
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
} }
template <> void InstARM32Vldr1d::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
assert(getDest()->hasReg());
Variable *Dest = getDest();
Type Ty = Dest->getType();
const bool IsVector = isVectorType(Ty);
const bool IsScalarFloat = isScalarFloatingType(Ty);
const char *ActualOpcode =
IsVector ? "vld1" : (IsScalarFloat ? "vldr" : "ldr");
const char *WidthString = IsVector ? "" : getWidthString(Ty);
Str << "\t" << ActualOpcode;
const bool IsVInst = IsVector || IsScalarFloat;
if (IsVInst) {
Str << getPredicate() << WidthString;
} else {
Str << WidthString << getPredicate();
}
if (IsVector)
Str << "." << getVecElmtBitsize(Ty);
Str << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
}
template <> void InstARM32Vldr1q::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
assert(getDest()->hasReg());
Variable *Dest = getDest();
Type Ty = Dest->getType();
const bool IsVector = isVectorType(Ty);
const bool IsScalarFloat = isScalarFloatingType(Ty);
const char *ActualOpcode =
IsVector ? "vld1" : (IsScalarFloat ? "vldr" : "ldr");
const char *WidthString = IsVector ? "" : getWidthString(Ty);
Str << "\t" << ActualOpcode;
const bool IsVInst = IsVector || IsScalarFloat;
if (IsVInst) {
Str << getPredicate() << WidthString;
} else {
Str << WidthString << getPredicate();
}
if (IsVector)
Str << "." << getVecElmtBitsize(Ty);
Str << "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
}
template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const { template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
...@@ -2187,6 +2340,20 @@ template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const { ...@@ -2187,6 +2340,20 @@ template <> void InstARM32Ldr::emitIAS(const Cfg *Func) const {
} }
} }
template <> void InstARM32Vldr1d::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
Variable *Dest = getDest();
Asm->vld1(32, Dest, getSrc(0), Func->getTarget());
}
template <> void InstARM32Vldr1q::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
Variable *Dest = getDest();
Asm->vld1(64, Dest, getSrc(0), Func->getTarget());
}
template <> void InstARM32Ldrex::emit(const Cfg *Func) const { template <> void InstARM32Ldrex::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
...@@ -2593,6 +2760,51 @@ void InstARM32Strex::dump(const Cfg *Func) const { ...@@ -2593,6 +2760,51 @@ void InstARM32Strex::dump(const Cfg *Func) const {
getSrc(0)->dump(Func); getSrc(0)->dump(Func);
} }
void InstARM32Vstr1::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Type Ty = getSrc(0)->getType();
const bool IsVectorStore = isVectorType(Ty);
const bool IsScalarFloat = isScalarFloatingType(Ty);
const char *Opcode =
IsVectorStore ? "vst1" : (IsScalarFloat ? "vstr" : "str");
Str << "\t" << Opcode;
const bool IsVInst = IsVectorStore || IsScalarFloat;
if (IsVInst) {
Str << getPredicate() << getWidthString(Ty);
} else {
Str << getWidthString(Ty) << getPredicate();
}
if (IsVectorStore)
Str << "." << getVecElmtBitsize(Ty);
Str << "\t";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
}
void InstARM32Vstr1::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 2);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Operand *Src0 = getSrc(0);
const Operand *Src1 = getSrc(1);
Asm->vst1(Size, Src0, Src1, Func->getTarget());
}
void InstARM32Vstr1::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Type Ty = getSrc(0)->getType();
dumpOpcodePred(Str, "str", Ty);
Str << " ";
getSrc(1)->dump(Func);
Str << ", ";
getSrc(0)->dump(Func);
}
void InstARM32Trap::emit(const Cfg *Func) const { void InstARM32Trap::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
...@@ -3166,10 +3378,14 @@ template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>; ...@@ -3166,10 +3378,14 @@ template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>; template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>; template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>; template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqmovn2>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vmulh>;
template class InstARM32ThreeAddrFP<InstARM32::Vmlap>;
template class InstARM32LoadBase<InstARM32::Ldr>; template class InstARM32LoadBase<InstARM32::Ldr>;
template class InstARM32LoadBase<InstARM32::Ldrex>; template class InstARM32LoadBase<InstARM32::Ldrex>;
template class InstARM32LoadBase<InstARM32::Vldr1d>;
template class InstARM32LoadBase<InstARM32::Vldr1q>;
template class InstARM32TwoAddrGPR<InstARM32::Movt>; template class InstARM32TwoAddrGPR<InstARM32::Movt>;
template class InstARM32UnaryopGPR<InstARM32::Movw, false>; template class InstARM32UnaryopGPR<InstARM32::Movw, false>;
......
...@@ -435,18 +435,24 @@ public: ...@@ -435,18 +435,24 @@ public:
Vcvt, Vcvt,
Vdiv, Vdiv,
Veor, Veor,
Vldr1d,
Vldr1q,
Vmla, Vmla,
Vmlap,
Vmls, Vmls,
Vmrs, Vmrs,
Vmul, Vmul,
Vmulh,
Vmvn, Vmvn,
Vneg, Vneg,
Vorr, Vorr,
Vqadd, Vqadd,
Vqmovn2,
Vqsub, Vqsub,
Vshl, Vshl,
Vshr, Vshr,
Vsqrt, Vsqrt,
Vstr1,
Vsub Vsub
}; };
...@@ -1020,11 +1026,16 @@ using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>; ...@@ -1020,11 +1026,16 @@ using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>; using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vqadd = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>; using InstARM32Vqadd = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
using InstARM32Vqsub = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>; using InstARM32Vqsub = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
using InstARM32Vqmovn2 = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqmovn2>;
using InstARM32Vmulh = InstARM32ThreeAddrSignAwareFP<InstARM32::Vmulh>;
using InstARM32Vmlap = InstARM32ThreeAddrFP<InstARM32::Vmlap>;
using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>; using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>; using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>; using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>; using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
using InstARM32Vldr1d = InstARM32LoadBase<InstARM32::Vldr1d>;
using InstARM32Vldr1q = InstARM32LoadBase<InstARM32::Vldr1q>;
/// MovT leaves the bottom bits alone so dest is also a source. This helps /// MovT leaves the bottom bits alone so dest is also a source. This helps
/// indicate that a previous MovW setting dest is not dead code. /// indicate that a previous MovW setting dest is not dead code.
using InstARM32Movt = InstARM32TwoAddrGPR<InstARM32::Movt>; using InstARM32Movt = InstARM32TwoAddrGPR<InstARM32::Movt>;
...@@ -1336,6 +1347,33 @@ private: ...@@ -1336,6 +1347,33 @@ private:
OperandARM32Mem *Mem, CondARM32::Cond Predicate); OperandARM32Mem *Mem, CondARM32::Cond Predicate);
}; };
/// Sub-vector store instruction. It's important for liveness that there is no
/// Dest operand (OperandARM32Mem instead of Dest Variable).
class InstARM32Vstr1 final : public InstARM32Pred {
InstARM32Vstr1() = delete;
InstARM32Vstr1(const InstARM32Vstr1 &) = delete;
InstARM32Vstr1 &operator=(const InstARM32Vstr1 &) = delete;
public:
/// Value must be a register.
static InstARM32Vstr1 *create(Cfg *Func, Variable *Value,
OperandARM32Mem *Mem, CondARM32::Cond Predicate,
SizeT Size) {
return new (Func->allocate<InstARM32Vstr1>())
InstARM32Vstr1(Func, Value, Mem, Predicate, Size);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Instr) { return isClassof(Instr, Vstr1); }
private:
InstARM32Vstr1(Cfg *Func, Variable *Value, OperandARM32Mem *Mem,
CondARM32::Cond Predicate, SizeT Size);
SizeT Size;
};
class InstARM32Trap : public InstARM32 { class InstARM32Trap : public InstARM32 {
InstARM32Trap() = delete; InstARM32Trap() = delete;
InstARM32Trap(const InstARM32Trap &) = delete; InstARM32Trap(const InstARM32Trap &) = delete;
...@@ -1630,6 +1668,8 @@ private: ...@@ -1630,6 +1668,8 @@ private:
template <> void InstARM32Ldr::emit(const Cfg *Func) const; template <> void InstARM32Ldr::emit(const Cfg *Func) const;
template <> void InstARM32Movw::emit(const Cfg *Func) const; template <> void InstARM32Movw::emit(const Cfg *Func) const;
template <> void InstARM32Movt::emit(const Cfg *Func) const; template <> void InstARM32Movt::emit(const Cfg *Func) const;
template <> void InstARM32Vldr1d::emit(const Cfg *Func) const;
template <> void InstARM32Vldr1q::emit(const Cfg *Func) const;
} // end of namespace ARM32 } // end of namespace ARM32
} // end of namespace Ice } // end of namespace Ice
......
...@@ -5331,23 +5331,75 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -5331,23 +5331,75 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return; return;
} }
case Intrinsics::LoadSubVector: { case Intrinsics::LoadSubVector: {
UnimplementedLoweringError(this, Instr); assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
"LoadSubVector second argument must be a constant");
Variable *Dest = Instr->getDest();
Type Ty = Dest->getType();
auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
Operand *Addr = Instr->getArg(0);
OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
doMockBoundsCheck(Src);
if (Dest->isRematerializable()) {
Context.insert<InstFakeDef>(Dest);
return;
}
auto *T = makeReg(Ty);
switch (SubVectorSize->getValue()) {
case 4:
_vldr1d(T, Src);
break;
case 8:
_vldr1q(T, Src);
break;
default:
Func->setError("Unexpected size for LoadSubVector");
return;
}
_mov(Dest, T); // FIXME: necessary?
return; return;
} }
case Intrinsics::StoreSubVector: { case Intrinsics::StoreSubVector: {
UnimplementedLoweringError(this, Instr); assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
"StoreSubVector third argument must be a constant");
auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
Variable *Value = legalizeToReg(Instr->getArg(0));
Operand *Addr = Instr->getArg(1);
OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
doMockBoundsCheck(NewAddr);
Value = legalizeToReg(Value);
switch (SubVectorSize->getValue()) {
case 4:
_vstr1d(Value, NewAddr);
break;
case 8:
_vstr1q(Value, NewAddr);
break;
default:
Func->setError("Unexpected size for StoreSubVector");
return;
}
return; return;
} }
case Intrinsics::MultiplyAddPairs: { case Intrinsics::MultiplyAddPairs: {
UnimplementedLoweringError(this, Instr); Variable *Src0 = legalizeToReg(Instr->getArg(0));
return; Variable *Src1 = legalizeToReg(Instr->getArg(1));
} Variable *T = makeReg(DestTy);
case Intrinsics::MultiplyHighSigned: { _vmlap(T, Src0, Src1);
UnimplementedLoweringError(this, Instr); _mov(Dest, T);
return; return;
} }
case Intrinsics::MultiplyHighSigned:
case Intrinsics::MultiplyHighUnsigned: { case Intrinsics::MultiplyHighUnsigned: {
UnimplementedLoweringError(this, Instr); bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
Variable *Src0 = legalizeToReg(Instr->getArg(0));
Variable *Src1 = legalizeToReg(Instr->getArg(1));
Variable *T = makeReg(DestTy);
_vmulh(T, Src0, Src1, Unsigned);
_mov(Dest, T);
return; return;
} }
case Intrinsics::Nearbyint: { case Intrinsics::Nearbyint: {
...@@ -5372,12 +5424,15 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -5372,12 +5424,15 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
_mov(Dest, T); _mov(Dest, T);
return; return;
} }
case Intrinsics::VectorPackSigned: { case Intrinsics::VectorPackSigned:
UnimplementedLoweringError(this, Instr);
return;
}
case Intrinsics::VectorPackUnsigned: { case Intrinsics::VectorPackUnsigned: {
UnimplementedLoweringError(this, Instr); bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
bool Saturating = true;
Variable *Src0 = legalizeToReg(Instr->getArg(0));
Variable *Src1 = legalizeToReg(Instr->getArg(1));
Variable *T = makeReg(DestTy);
_vqmovn2(T, Src0, Src1, Unsigned, Saturating);
_mov(Dest, T);
return; return;
} }
default: // UnknownIntrinsic default: // UnknownIntrinsic
......
...@@ -888,18 +888,33 @@ protected: ...@@ -888,18 +888,33 @@ protected:
void _veor(Variable *Dest, Variable *Src0, Variable *Src1) { void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Veor>(Dest, Src0, Src1); Context.insert<InstARM32Veor>(Dest, Src0, Src1);
} }
void _vldr1d(Variable *Dest, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred);
}
void _vldr1q(Variable *Dest, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred);
}
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vmrs>(Pred); Context.insert<InstARM32Vmrs>(Pred);
} }
void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) { void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmla>(Dest, Src0, Src1); Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
} }
void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmlap>(Dest, Src0, Src1);
}
void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) { void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmls>(Dest, Src0, Src1); Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
} }
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmul>(Dest, Src0, Src1); Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
} }
void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
Context.insert<InstARM32Vmulh>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
}
void _vmvn(Variable *Dest, Variable *Src0) { void _vmvn(Variable *Dest, Variable *Src0) {
Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL); Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
} }
...@@ -914,6 +929,13 @@ protected: ...@@ -914,6 +929,13 @@ protected:
Context.insert<InstARM32Vqadd>(Dest, Src0, Src1) Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
} }
void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned,
bool Saturating) {
Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1)
->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned
: InstARM32::FS_Signed)
: InstARM32::FS_None);
}
void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
Context.insert<InstARM32Vqsub>(Dest, Src0, Src1) Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
...@@ -933,6 +955,14 @@ protected: ...@@ -933,6 +955,14 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vsqrt>(Dest, Src, Pred); Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
} }
void _vstr1d(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32);
}
void _vstr1q(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64);
}
void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vsub>(Dest, Src0, Src1); Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment