Commit 15e77d46 by John Porto

Subzero. ARM32. Vector shifts.

BUG= R=kschimpf@google.com, stichnot@chromium.org Review URL: https://codereview.chromium.org/1881623002 .
parent 3018cf2b
......@@ -1289,20 +1289,20 @@ void Assembler::vmulqi(OperandSize sz,
void Assembler::vmulqs(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B11 | B10 | B8 | B4, kSWord, qd, qn, qm);
}
#endif
// Moved to ARM32::AssemblerARM32::vshlqi().
void Assembler::vshlqi(OperandSize sz,
QRegister qd, QRegister qm, QRegister qn) {
EmitSIMDqqq(B25 | B10, sz, qd, qn, qm);
}
// Moved to ARM32::AssemblerARM32::vshlqu().
void Assembler::vshlqu(OperandSize sz,
QRegister qd, QRegister qm, QRegister qn) {
EmitSIMDqqq(B25 | B24 | B10, sz, qd, qn, qm);
}
#if 0
// Moved to ARM32::AssemblerARM32::veorq()
void Assembler::veorq(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B8 | B4, kByte, qd, qn, qm);
......@@ -1345,12 +1345,13 @@ void Assembler::vabsqs(QRegister qd, QRegister qm) {
EmitSIMDqqq(B24 | B23 | B21 | B20 | B19 | B16 | B10 | B9 | B8, kSWord,
qd, Q0, qm);
}
#endif
// Moved to Arm32::AssemblerARM32::vnegqs().
void Assembler::vnegqs(QRegister qd, QRegister qm) {
EmitSIMDqqq(B24 | B23 | B21 | B20 | B19 | B16 | B10 | B9 | B8 | B7, kSWord,
qd, Q0, qm);
}
#endif
void Assembler::vrecpeqs(QRegister qd, QRegister qm) {
......
......@@ -693,10 +693,10 @@ class Assembler : public ValueObject {
void vmulqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
// Moved to ARM32::AssemblerARM32::vmulqf().
void vmulqs(QRegister qd, QRegister qn, QRegister qm);
#endif
// Moved to ARM32::AssemblerARM32::vshlqi().
void vshlqi(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
// Moved to ARM32::AssemblerARM32::vshlqu().
void vshlqu(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
#if 0
// Moved to Arm32::AssemblerARM32::vmlas()
void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vmlad()
......@@ -749,8 +749,9 @@ class Assembler : public ValueObject {
#endif
void vnegs(SRegister sd, SRegister sm, Condition cond = AL);
void vnegd(DRegister dd, DRegister dm, Condition cond = AL);
void vnegqs(QRegister qd, QRegister qm);
#if 0
// Moved to ARM32::AssemblerARM32::vnegqs().
void vnegqs(QRegister qd, QRegister qm);
// Moved to ARM32::AssemblerARM32::vsqrts().
void vsqrts(SRegister sd, SRegister sm, Condition cond = AL);
// Moved to ARM32::AssemblerARM32::vsqrts().
......
......@@ -3056,6 +3056,30 @@ void AssemblerARM32::vmulqf(const Operand *OpQd, const Operand *OpQn,
emitSIMDqqqBase(VmulqfOpcode, OpQd, OpQn, OpQm, IsFloatTy, Vmulqf);
}
void AssemblerARM32::vnegqs(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm) {
// VNEG - ARM section A8.8.355, encoding A1:
// vneg.<dt> <Qd>, <Qm>
//
// 111111111D11ss01dddd0F111QM0mmmm where Dddd=Qd, and Mmmm=Qm, and:
// * dt=s8 -> 00=ss, 0=F
// * dt=s16 -> 01=ss, 0=F
// * dt=s32 -> 10=ss, 0=F
// * dt=s32 -> 10=ss, 1=F
constexpr const char *Vneg = "vneg";
constexpr IValueT VnegOpcode = B24 | B23 | B21 | B20 | B16 | B9 | B8 | B7;
const IValueT Qd = encodeQRegister(OpQd, "Qd", Vneg);
constexpr IValueT Qn = 0;
const IValueT Qm = encodeQRegister(OpQm, "Qm", Vneg);
constexpr bool UseQRegs = true;
constexpr IValueT ElmtShift = 18;
const IValueT ElmtSize = encodeElmtType(ElmtTy);
assert(Utils::IsUint(2, ElmtSize));
emitSIMDBase(VnegOpcode | (ElmtSize << ElmtShift), mapQRegToDReg(Qd),
mapQRegToDReg(Qn), mapQRegToDReg(Qm), UseQRegs,
isFloatingType(ElmtTy));
}
void AssemblerARM32::vorrq(const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn) {
// VORR (register) - ARM section A8.8.360, encoding A1:
......@@ -3229,6 +3253,34 @@ void AssemblerARM32::vpush(const Variable *OpBaseReg, SizeT NumConsecRegs,
emitVStackOp(Cond, VpushOpcode, OpBaseReg, NumConsecRegs);
}
void AssemblerARM32::vshlqi(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VSHL - ARM section A8.8.396, encoding A1:
// vshl Qd, Qm, Qn
//
// 1111001U0Dssnnnndddd0100NQM0mmmm where Ddddd=Qd, Mmmmm=Qm, Nnnnn=Qn, 0=U,
// 1=Q
assert(isScalarIntegerType(ElmtTy) &&
"vshl expects vector with integer element type");
constexpr const char *Vshl = "vshl";
constexpr IValueT VshlOpcode = B10 | B6;
emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
}
void AssemblerARM32::vshlqu(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VSHL - ARM section A8.8.396, encoding A1:
// vshl Qd, Qm, Qn
//
// 1111001U0Dssnnnndddd0100NQM0mmmm where Ddddd=Qd, Mmmmm=Qm, Nnnnn=Qn, 1=U,
// 1=Q
assert(isScalarIntegerType(ElmtTy) &&
"vshl expects vector with integer element type");
constexpr const char *Vshl = "vshl";
constexpr IValueT VshlOpcode = B24 | B10 | B6;
emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
}
void AssemblerARM32::vsqrtd(const Operand *OpDd, const Operand *OpDm,
CondARM32::Cond Cond) {
// VSQRT - ARM section A8.8.401, encoding A1:
......
......@@ -495,6 +495,8 @@ public:
void vmuls(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond);
void vnegqs(Type ElmtTy, const Operand *OpQd, const Operand *OpQm);
void vorrq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vpop(const Variable *OpBaseReg, SizeT NumConsecRegs,
......@@ -503,6 +505,12 @@ public:
void vpush(const Variable *OpBaseReg, SizeT NumConsecRegs,
CondARM32::Cond Cond);
void vshlqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vshlqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vsqrtd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond Cond);
void vsqrts(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond);
......
......@@ -70,24 +70,25 @@
// the # of offset bits allowed as part of an addressing mode (for sign or zero
// extending load/stores).
#define ICETYPEARM32_TABLE \
/* tag, element type, int_width, vec_width, addr bits sext, zext, \
reg-reg addr allowed, shift allowed, */ \
X(IceType_void, IceType_void, "" , "" , 0 , 0 , 0, 0) \
X(IceType_i1, IceType_void, "b", "" , 8 , 12, 1, 1) \
X(IceType_i8, IceType_void, "b", "" , 8 , 12, 1, 1) \
X(IceType_i16, IceType_void, "h", "" , 8 , 8 , 1, 0) \
X(IceType_i32, IceType_void, "" , "" , 12, 12, 1, 1) \
X(IceType_i64, IceType_void, "d", "" , 12, 12, 1, 1) \
X(IceType_f32, IceType_void, "" , ".f32", 8, 8 , 0, 0) \
X(IceType_f64, IceType_void, "" , ".f64", 8, 8 , 0, 0) \
X(IceType_v4i1, IceType_i32 , "" , ".i32", 0 , 0 , 1, 0) \
X(IceType_v8i1, IceType_i16 , "" , ".i16", 0 , 0 , 1, 0) \
X(IceType_v16i1, IceType_i8 , "" , ".i8" , 0 , 0 , 1, 0) \
X(IceType_v16i8, IceType_i8 , "" , ".i8" , 0 , 0 , 1, 0) \
X(IceType_v8i16, IceType_i16 , "" , ".i16", 0 , 0 , 1, 0) \
X(IceType_v4i32, IceType_i32 , "" , ".i32", 0 , 0 , 1, 0) \
X(IceType_v4f32, IceType_f32 , "" , ".f32", 0 , 0 , 1, 0)
//#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr)
/* tag, element type, int_width, fp_width, uvec_width, svec_width, \
addr bits sext, zext, reg-reg addr allowed, shift allowed, */ \
X(IceType_void, IceType_void, "" , "" , "" , "" , 0 , 0 , 0, 0) \
X(IceType_i1, IceType_void, "b", "" , "" , "" , 8 , 12, 1, 1) \
X(IceType_i8, IceType_void, "b", "" , "" , "" , 8 , 12, 1, 1) \
X(IceType_i16, IceType_void, "h", "" , "" , "" , 8 , 8 , 1, 0) \
X(IceType_i32, IceType_void, "" , "" , "" , "" , 12, 12, 1, 1) \
X(IceType_i64, IceType_void, "d", "" , "" , "" , 12, 12, 1, 1) \
X(IceType_f32, IceType_void, "" , ".f32", "" , "" , 8, 8 , 0, 0) \
X(IceType_f64, IceType_void, "" , ".f64", "" , "" , 8, 8 , 0, 0) \
X(IceType_v4i1, IceType_i32 , "" , ".i32", ".u32", ".s32", 0 , 0 , 1, 0) \
X(IceType_v8i1, IceType_i16 , "" , ".i16", ".u16", ".s16", 0 , 0 , 1, 0) \
X(IceType_v16i1, IceType_i8 , "" , ".i8" , ".u8" , ".s8" , 0 , 0 , 1, 0) \
X(IceType_v16i8, IceType_i8 , "" , ".i8" , ".u8" , ".s8" , 0 , 0 , 1, 0) \
X(IceType_v8i16, IceType_i16 , "" , ".i16", ".u16", ".s16", 0 , 0 , 1, 0) \
X(IceType_v4i32, IceType_i32 , "" , ".i32", ".u32", ".s32", 0 , 0 , 1, 0) \
X(IceType_v4f32, IceType_f32 , "" , ".f32", ".f32", ".f32", 0 , 0 , 1, 0)
//#define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,
// ubits, rraddr, shaddr)
// Shifter types for Data-processing operands as defined in section A5.1.2.
#define ICEINSTARM32SHIFT_TABLE \
......
......@@ -435,15 +435,15 @@ public:
Vmls,
Vmrs,
Vmul,
Vneg,
Vorr,
Vshl,
Vsqrt,
Vsub
};
static constexpr size_t InstSize = sizeof(uint32_t);
static const char *getWidthString(Type Ty);
static const char *getVecWidthString(Type Ty);
static CondARM32::Cond getOppositeCondition(CondARM32::Cond Cond);
/// Called inside derived methods emit() to communicate that multiple
......@@ -452,11 +452,20 @@ public:
/// implemented.
void startNextInst(const Cfg *Func) const;
/// FPSign is used for certain vector instructions (particularly, right
/// shifts) that require an operand sign specification.
enum FPSign {
FS_None,
FS_Signed,
FS_Unsigned,
};
/// Shared emit routines for common forms of instructions.
static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Instr,
const Cfg *Func);
static void emitFourAddrFP(const char *Opcode, const InstARM32 *Instr,
const Cfg *Func);
/// @{
static void emitThreeAddrFP(const char *Opcode, FPSign Sign,
const InstARM32 *Instr, const Cfg *Func);
static void emitFourAddrFP(const char *Opcode, FPSign Sign,
const InstARM32 *Instr, const Cfg *Func);
/// @}
void dump(const Cfg *Func) const override;
......@@ -495,8 +504,8 @@ public:
/// Shared emit routines for common forms of instructions.
static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Instr,
const Cfg *Func, bool NeedsWidthSuffix);
static void emitUnaryopFP(const char *Opcode, const InstARM32Pred *Instr,
const Cfg *Func);
static void emitUnaryopFP(const char *Opcode, FPSign Sign,
const InstARM32Pred *Instr, const Cfg *Func);
static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Instr,
const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Instr,
......@@ -573,7 +582,7 @@ public:
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitUnaryopFP(Opcode, this, Func);
emitUnaryopFP(Opcode, Sign, this, Func);
}
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
......@@ -588,16 +597,39 @@ public:
}
static bool classof(const Inst *Instr) { return isClassof(Instr, K); }
private:
protected:
InstARM32UnaryopFP(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 1, Dest, Predicate) {
addSource(Src);
}
FPSign Sign = FS_None;
static const char *Opcode;
};
template <InstARM32::InstKindARM32 K>
class InstARM32UnaryopSignAwareFP : public InstARM32UnaryopFP<K> {
InstARM32UnaryopSignAwareFP() = delete;
InstARM32UnaryopSignAwareFP(const InstARM32UnaryopSignAwareFP &) = delete;
InstARM32UnaryopSignAwareFP &
operator=(const InstARM32UnaryopSignAwareFP &) = delete;
public:
static InstARM32UnaryopSignAwareFP *
create(Cfg *Func, Variable *Dest, Variable *Src, CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32UnaryopSignAwareFP>())
InstARM32UnaryopSignAwareFP(Func, Dest, Src, Predicate);
}
void emitIAS(const Cfg *Func) const override;
void setSignType(InstARM32::FPSign SignType) { this->Sign = SignType; }
private:
InstARM32UnaryopSignAwareFP(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate)
: InstARM32UnaryopFP<K>(Func, Dest, Src, Predicate) {}
};
/// Instructions of the form x := x op y.
template <InstARM32::InstKindARM32 K>
class InstARM32TwoAddrGPR : public InstARM32Pred {
......@@ -748,7 +780,7 @@ public:
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitThreeAddrFP(Opcode, this, Func);
emitThreeAddrFP(Opcode, Sign, this, Func);
}
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
......@@ -762,9 +794,10 @@ public:
}
static bool classof(const Inst *Instr) { return isClassof(Instr, K); }
private:
InstARM32ThreeAddrFP(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1)
protected:
FPSign Sign = FS_None;
InstARM32ThreeAddrFP(Cfg *Func, Variable *Dest, Variable *Src0, Operand *Src1)
: InstARM32(Func, K, 2, Dest) {
addSource(Src0);
addSource(Src1);
......@@ -773,6 +806,31 @@ private:
static const char *Opcode;
};
template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrSignAwareFP : public InstARM32ThreeAddrFP<K> {
InstARM32ThreeAddrSignAwareFP() = delete;
InstARM32ThreeAddrSignAwareFP(const InstARM32ThreeAddrSignAwareFP &) = delete;
InstARM32ThreeAddrSignAwareFP &
operator=(const InstARM32ThreeAddrSignAwareFP &) = delete;
public:
/// Create a vector/FP binary-op instruction like vadd, and vsub. Everything
/// must be a register.
static InstARM32ThreeAddrSignAwareFP *create(Cfg *Func, Variable *Dest,
Variable *Src0, Variable *Src1) {
return new (Func->allocate<InstARM32ThreeAddrSignAwareFP>())
InstARM32ThreeAddrSignAwareFP(Func, Dest, Src0, Src1);
}
void emitIAS(const Cfg *Func) const override;
void setSignType(InstARM32::FPSign SignType) { this->Sign = SignType; }
private:
InstARM32ThreeAddrSignAwareFP(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1)
: InstARM32ThreeAddrFP<K>(Func, Dest, Src0, Src1) {}
};
/// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate.
template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrGPR : public InstARM32Pred {
......@@ -840,7 +898,7 @@ public:
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitFourAddrFP(Opcode, this, Func);
emitFourAddrFP(Opcode, Sign, this, Func);
}
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
......@@ -864,6 +922,7 @@ private:
addSource(Src1);
}
FPSign Sign = FS_None;
static const char *Opcode;
};
......@@ -931,7 +990,9 @@ using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
......
......@@ -454,12 +454,9 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
switch (Op) {
default:
break;
case InstArithmetic::Ashr:
case InstArithmetic::Fdiv:
case InstArithmetic::Frem:
case InstArithmetic::Lshr:
case InstArithmetic::Sdiv:
case InstArithmetic::Shl:
case InstArithmetic::Srem:
case InstArithmetic::Udiv:
case InstArithmetic::Urem:
......@@ -1960,7 +1957,8 @@ void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
// For now, we don't handle address modes with Relocatables.
namespace {
// MemTraits contains per-type valid address mode information.
#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \
#define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
ubits, rraddr, shaddr) \
static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
ICETYPEARM32_TABLE
#undef X
......@@ -1971,7 +1969,8 @@ static const struct {
bool CanHaveIndex;
bool CanHaveShiftedIndex;
} MemTraits[] = {
#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \
#define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
ubits, rraddr, shaddr) \
{ (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, } \
,
ICETYPEARM32_TABLE
......@@ -3120,15 +3119,18 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
UnimplementedLoweringError(this, Instr);
return;
// Explicitly whitelist vector instructions we have implemented/enabled.
case InstArithmetic::Fadd:
case InstArithmetic::Add:
case InstArithmetic::Fsub:
case InstArithmetic::Sub:
case InstArithmetic::And:
case InstArithmetic::Or:
case InstArithmetic::Xor:
case InstArithmetic::Ashr:
case InstArithmetic::Fadd:
case InstArithmetic::Fmul:
case InstArithmetic::Fsub:
case InstArithmetic::Lshr:
case InstArithmetic::Mul:
case InstArithmetic::Or:
case InstArithmetic::Shl:
case InstArithmetic::Sub:
case InstArithmetic::Xor:
break;
}
}
......@@ -3448,26 +3450,46 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
}
case InstArithmetic::Shl: {
Variable *Src0R = Srcs.unswappedSrc0R(this);
Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
_lsl(T, Src0R, Src1R);
if (!isVectorType(T->getType())) {
Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
_lsl(T, Src0R, Src1R);
} else {
auto *Src1R = Srcs.unswappedSrc1R(this);
_vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
}
_mov(Dest, T);
return;
}
case InstArithmetic::Lshr: {
Variable *Src0R = Srcs.unswappedSrc0R(this);
if (DestTy != IceType_i32) {
_uxt(Src0R, Src0R);
if (!isVectorType(T->getType())) {
Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
if (DestTy != IceType_i32) {
_uxt(Src0R, Src0R);
}
_lsr(T, Src0R, Src1R);
} else {
auto *Src1R = Srcs.unswappedSrc1R(this);
auto *Src1RNeg = makeReg(Src1R->getType());
_vneg(Src1RNeg, Src1R);
_vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
}
_lsr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
_mov(Dest, T);
return;
}
case InstArithmetic::Ashr: {
Variable *Src0R = Srcs.unswappedSrc0R(this);
if (DestTy != IceType_i32) {
_sxt(Src0R, Src0R);
if (!isVectorType(T->getType())) {
if (DestTy != IceType_i32) {
_sxt(Src0R, Src0R);
}
_asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
} else {
auto *Src1R = Srcs.unswappedSrc1R(this);
auto *Src1RNeg = makeReg(Src1R->getType());
_vneg(Src1RNeg, Src1R);
_vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
}
_asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
_mov(Dest, T);
return;
}
......
......@@ -884,9 +884,16 @@ protected:
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
}
void _vneg(Variable *Dest, Variable *Src0) {
Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
->setSignType(InstARM32::FS_Signed);
}
void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
}
InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
}
void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
......
......@@ -33,6 +33,7 @@ entry:
; ASM-NEXT: asr r0, r0, #23
; DIS-NEXT: 0: e1a00bc0
; IASM-NOT: asr
; IASM-NEXT: .byte 0xc0
; IASM-NEXT: .byte 0xb
; IASM-NEXT: .byte 0xa0
......@@ -54,6 +55,7 @@ entry:
; ASM-NEXT: asr r0, r0, r1
; DIS-NEXT: 10: e1a00150
; IASM-NOT: asr
; IASM-NEXT: .byte 0x50
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xa0
......@@ -71,14 +73,12 @@ entry:
%v = ashr <4 x i32> %a, %b
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; DIS: 28: e1a00150
; DIS: 38: e1a00150
; DIS: 48: e1a00150
; DIS: 58: e1a00150
; ASM: vneg.s32 q1, q1
; ASM-NEXT: vshl.s32 q0, q0, q1
; DIS: 20: f3b923c2
; DIS: 24: f2220440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <4 x i32> %v
}
......@@ -90,14 +90,12 @@ entry:
%v = ashr <8 x i16> %a, %b
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: vneg.s16 q1, q1
; ASM-NEXT: vshl.s16 q0, q0, q1
; DIS: 30: f3b523c2
; DIS: 34: f2120440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <8 x i16> %v
}
......@@ -109,22 +107,12 @@ entry:
%v = ashr <16 x i8> %a, %b
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: vneg.s8 q1, q1
; ASM-NEXT: vshl.s8 q0, q0, q1
; DIS: 40: f3b123c2
; DIS: 44: f2020440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <16 x i8> %v
}
......@@ -33,10 +33,7 @@ entry:
; ASM-NEXT: lsl r0, r0, #23
; DIS-NEXT: 0: e1a00b80
; IASM-NEXT: .byte 0x80
; IASM-NEXT: .byte 0xb
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM-NOT: lsl
ret i32 %shl
}
......@@ -54,10 +51,7 @@ entry:
; ASM-NEXT: lsl r0, r0, r1
; DIS-NEXT: 10: e1a00110
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM-NOT: lsl
ret i32 %shl
}
......@@ -73,11 +67,9 @@ entry:
%shl = shl <4 x i32> %a, %b
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; DIS: 28: e1a00110
; ASM: vshl.u32 q0, q0, q1
; DIS: 20: f3220440
; IASM-NOT: vshl
ret <4 x i32> %shl
}
......@@ -89,14 +81,9 @@ entry:
%v = shl <8 x i16> %a, %b
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: vshl.u16 q0, q0, q1
; DIS: 30: f3120440
; IASM-NOT: vshl
ret <8 x i16> %v
}
......@@ -108,22 +95,9 @@ entry:
%v = shl <16 x i8> %a, %b
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: vshl.u8 q0, q0, q1
; DIS: 40: f3020440
; IASM-NOT: vshl
ret <16 x i8> %v
}
......@@ -33,10 +33,7 @@ entry:
; ASM-NEXT: lsr r0, r0, #23
; DIS-NEXT: 0: e1a00ba0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xb
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM-NOT: lsr
ret i32 %v
}
......@@ -54,10 +51,7 @@ entry:
; ASM-NEXT: lsr r0, r0, r1
; DIS-NEXT: 10: e1a00130
; IASM-NEXT: .byte 0x30
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM-NOT: lsr
ret i32 %v
}
......@@ -73,11 +67,12 @@ entry:
%v = lshr <4 x i32> %a, %b
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; DIS: 28: e1a00130
; ASM: vneg.s32 q1, q1
; ASM-NEXT: vshl.u32 q0, q0, q1
; DIS: 20: f3b923c2
; DIS: 24: f3220440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <4 x i32> %v
}
......@@ -89,14 +84,12 @@ entry:
%v = lshr <8 x i16> %a, %b
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: vneg.s16 q1, q1
; ASM-NEXT: vshl.u16 q0, q0, q1
; DIS: 30: f3b523c2
; DIS: 34: f3120440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <8 x i16> %v
}
......@@ -108,22 +101,12 @@ entry:
%v = lshr <16 x i8> %a, %b
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: vneg.s8 q1, q1
; ASM-NEXT: vshl.u8 q0, q0, q1
; DIS: 40: f3b123c2
; DIS: 44: f3020440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <16 x i8> %v
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment