Commit 6de32b21 by Karl Schimpf

Add insert/extract element to the integrated ARM assembler.

parent cc69fa29
......@@ -683,7 +683,8 @@ void Assembler::vmovrrs(Register rt, Register rt2, SRegister sm,
Emit(encoding);
}
#if 0
// Moved to ARM32::AssemblerARM32::vmovdqir().
void Assembler::vmovdr(DRegister dn, int i, Register rt, Condition cond) {
ASSERT(TargetCPUFeatures::vfp_supported());
ASSERT((i == 0) || (i == 1));
......@@ -701,7 +702,6 @@ void Assembler::vmovdr(DRegister dn, int i, Register rt, Condition cond) {
Emit(encoding);
}
#if 0
// Moved to ARM32::AssemblerARM32::vmovdrr().
void Assembler::vmovdrr(DRegister dm, Register rt, Register rt2,
Condition cond) {
......
......@@ -630,9 +630,8 @@ class Assembler : public ValueObject {
void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL);
// Moved to ARM32::AssemblerARM32::vmovrrd().
void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL);
#endif
// Moved to ARM32::AssemblerARM32::vmovqir().
void vmovdr(DRegister dd, int i, Register rt, Condition cond = AL);
#if 0
// Moved to ARM32::AssemblerARM32::vmovss().
void vmovs(SRegister sd, SRegister sm, Condition cond = AL);
// Moved to ARM32::AssemblerARM32::vmovdd().
......@@ -1409,6 +1408,7 @@ class Assembler : public ValueObject {
// ARM32::AssemblerARM32::veord()
// ARM32::AssemblerARM32::vld1qr()
// ARM32::AssemblerARM32::vst1qr()
// ARM32::AssemblerARM32::vmorqi()
#endif
DISALLOW_ALLOCATION();
......
......@@ -158,7 +158,7 @@ IValueT encodeElmtType(Type ElmtTy) {
return 3;
default:
llvm::report_fatal_error("SIMD op: Don't understand element type " +
std::string(typeString(ElmtTy)));
typeIceString(ElmtTy));
}
}
......@@ -213,7 +213,17 @@ IValueT getEncodedQRegNum(const Variable *Var) {
return RegARM32::getEncodedQReg(Var->getRegNum());
}
IValueT mapQRegToDReg(IValueT EncodedQReg) { return EncodedQReg << 1; }
IValueT mapQRegToDReg(IValueT EncodedQReg) {
IValueT DReg = EncodedQReg << 1;
assert(DReg < RegARM32::getNumDRegs());
return DReg;
}
IValueT mapQRegToSReg(IValueT EncodedQReg) {
IValueT SReg = EncodedQReg << 2;
assert(SReg < RegARM32::getNumSRegs());
return SReg;
}
IValueT getYInRegXXXXY(IValueT RegXXXXY) { return RegXXXXY & 0x1; }
......@@ -1010,6 +1020,60 @@ void AssemblerARM32::emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd,
emitInst(Encoding);
}
void AssemblerARM32::emitInsertExtractInt(CondARM32::Cond Cond,
const Operand *OpQn, uint32_t Index,
const Operand *OpRt, bool IsExtract,
const char *InstName) {
const IValueT Rt = encodeGPRegister(OpRt, "Rt", InstName);
IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", InstName));
assert(Rt != RegARM32::Encoded_Reg_pc);
assert(Rt != RegARM32::Encoded_Reg_sp);
assert(CondARM32::isDefined(Cond));
const uint32_t BitSize = typeWidthInBytes(OpRt->getType()) * CHAR_BIT;
IValueT Opcode1 = 0;
IValueT Opcode2 = 0;
switch (BitSize) {
default:
llvm::report_fatal_error(std::string(InstName) +
": Unable to process type " +
typeIceString(OpRt->getType()));
case 8:
assert(Index < 16);
Dn = Dn | mask(Index, 3, 1);
Opcode1 = B1 | mask(Index, 2, 1);
Opcode2 = mask(Index, 0, 2);
break;
case 16:
assert(Index < 8);
Dn = Dn | mask(Index, 2, 1);
Opcode1 = mask(Index, 1, 1);
Opcode2 = (mask(Index, 0, 1) << 1) | B0;
break;
case 32:
assert(Index < 4);
Dn = Dn | mask(Index, 1, 1);
Opcode1 = mask(Index, 0, 1);
break;
}
const IValueT Encoding = B27 | B26 | B25 | B11 | B9 | B8 | B4 |
(encodeCondition(Cond) << kConditionShift) |
(Opcode1 << 21) |
(getXXXXInRegYXXXX(Dn) << kRnShift) | (Rt << 12) |
(encodeBool(IsExtract) << 20) |
(getYInRegYXXXX(Dn) << 7) | (Opcode2 << 5);
emitInst(Encoding);
}
void AssemblerARM32::emitMoveSS(CondARM32::Cond Cond, IValueT Sd, IValueT Sm) {
// VMOV (register) - ARM section A8.8.340, encoding A2:
// vmov<c>.f32 <Sd>, <Sm>
//
// cccc11101D110000dddd101001M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
constexpr IValueT VmovssOpcode = B23 | B21 | B20 | B6;
constexpr IValueT S0 = 0;
emitVFPsss(Cond, VmovssOpcode, Sd, S0, Sm);
}
void AssemblerARM32::emitMulOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd,
IValueT Rn, IValueT Rm, IValueT Rs,
bool SetFlags) {
......@@ -2654,6 +2718,33 @@ void AssemblerARM32::vmovdrr(const Operand *OpDm, const Operand *OpRt,
emitInst(Encoding);
}
void AssemblerARM32::vmovqir(const Operand *OpQn, uint32_t Index,
const Operand *OpRt, CondARM32::Cond Cond) {
// VMOV (ARM core register to scalar) - ARM section A8.8.341, encoding A1:
// vmov<c>.<size> <Dn[x]>, <Rt>
constexpr const char *Vmovdr = "vmovdr";
constexpr bool IsExtract = true;
emitInsertExtractInt(Cond, OpQn, Index, OpRt, !IsExtract, Vmovdr);
}
void AssemblerARM32::vmovqis(const Operand *OpQd, uint32_t Index,
const Operand *OpSm, CondARM32::Cond Cond) {
constexpr const char *Vmovqis = "vmovqis";
assert(Index < 4);
IValueT Sd = mapQRegToSReg(encodeQRegister(OpQd, "Qd", Vmovqis)) + Index;
IValueT Sm = encodeSRegister(OpSm, "Sm", Vmovqis);
emitMoveSS(Cond, Sd, Sm);
}
void AssemblerARM32::vmovrqi(const Operand *OpRt, const Operand *OpQn,
uint32_t Index, CondARM32::Cond Cond) {
// VMOV (scalar to ARM core register) - ARM section A8.8.342, encoding A1:
// vmov<c>.<dt> <Rt>, <Dn[x]>
constexpr const char *Vmovrd = "vmovrd";
constexpr bool IsExtract = true;
emitInsertExtractInt(Cond, OpQn, Index, OpRt, IsExtract, Vmovrd);
}
void AssemblerARM32::vmovrrd(const Operand *OpRt, const Operand *OpRt2,
const Operand *OpDm, CondARM32::Cond Cond) {
// VMOV (between two ARM core registers and a doubleword extension register).
......@@ -2716,16 +2807,20 @@ void AssemblerARM32::vmovs(const Operand *OpSd,
void AssemblerARM32::vmovss(const Operand *OpSd, const Variable *OpSm,
CondARM32::Cond Cond) {
// VMOV (register) - ARM section A8.8.340, encoding A2:
// vmov<c>.f32 <Sd>, <Sm>
//
// cccc11101D110000dddd101001M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
constexpr const char *Vmovss = "Vmovss";
IValueT Sd = encodeSRegister(OpSd, "Sd", Vmovss);
IValueT Sm = encodeSRegister(OpSm, "Sm", Vmovss);
constexpr IValueT VmovssOpcode = B23 | B21 | B20 | B6;
constexpr IValueT S0 = 0;
emitVFPsss(Cond, VmovssOpcode, Sd, S0, Sm);
emitMoveSS(Cond, Sd, Sm);
}
void AssemblerARM32::vmovsqi(const Operand *OpSd, const Operand *OpQm,
uint32_t Index, CondARM32::Cond Cond) {
constexpr const char *Vmovsqi = "vmovsqi";
const IValueT Sd = encodeSRegister(OpSd, "Sd", Vmovsqi);
assert(Index < 4);
const IValueT Sm =
mapQRegToSReg(encodeQRegister(OpQm, "Qm", Vmovsqi)) + Index;
emitMoveSS(Cond, Sd, Sm);
}
void AssemblerARM32::vmovsr(const Operand *OpSn, const Operand *OpRt,
......
......@@ -417,24 +417,48 @@ public:
vld1qr(ElmtSize, OpQd, OpRn, TInfo);
}
// Dn = FpImm
void vmovd(const Operand *OpDn, const OperandARM32FlexFpImm *OpFpImm,
CondARM32::Cond Cond);
// Dd = Dm
void vmovdd(const Operand *OpDd, const Variable *OpDm, CondARM32::Cond Cond);
// Dm = Rt:Rt2
void vmovdrr(const Operand *OpDm, const Operand *OpRt, const Operand *OpRt2,
CondARM32::Cond Cond);
// Qd[Index] = Rt
void vmovqir(const Operand *OpQd, uint32_t Index, const Operand *OpRt,
CondARM32::Cond Cond);
// Qd[Index] = Sm
void vmovqis(const Operand *OpQd, uint32_t Indx, const Operand *OpSm,
CondARM32::Cond Cond);
// Rt = Qm[Index]
void vmovrqi(const Operand *OpRt, const Operand *OpQd, uint32_t Index,
CondARM32::Cond Cond);
// Rt:Rt2 = Dm
void vmovrrd(const Operand *OpRt, const Operand *OpRt2, const Operand *OpDm,
CondARM32::Cond Cond);
// Rt = Sn
void vmovrs(const Operand *OpRt, const Operand *OpSn, CondARM32::Cond Cond);
// Sn = FpImm
void vmovs(const Operand *OpSn, const OperandARM32FlexFpImm *OpFpImm,
CondARM32::Cond Cond);
void vmovss(const Operand *OpDd, const Variable *OpDm, CondARM32::Cond Cond);
// Sd = Sm
void vmovss(const Operand *OpSd, const Variable *OpSm, CondARM32::Cond Cond);
// Sd = Qm[Index]
void vmovsqi(const Operand *OpSd, const Operand *OpQm, uint32_t Index,
CondARM32::Cond Cond);
// Sn = Rt
void vmovsr(const Operand *OpSn, const Operand *OpRt, CondARM32::Cond Cond);
void vmlad(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
......@@ -641,6 +665,17 @@ private:
void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
IValueT Rm);
// cccc1110iiiennnntttt1011Njj10000 where cccc=Cond, tttt=Rt, Ndddd=2*Qn=Dn,
// iii=Opcode1, jj=Opcode2, Opcode1Opcode2 encodes Index and the
// corresponding element size of the vector element, and e=IsExtract.
void emitInsertExtractInt(CondARM32::Cond Cond, const Operand *OpQn,
uint32_t Index, const Operand *OpRt, bool IsExtract,
const char *InstName);
// cccc11101D110000dddd101001M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
// Assigns Sd the value of Sm.
void emitMoveSS(CondARM32::Cond Cond, IValueT Sd, IValueT Sm);
// Pattern ccccxxxxxxxfnnnnddddssss1001mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
// mmmm=Rm, ssss=Rs, f=SetFlags and xxxxxxx=Opcode.
void emitMulOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
......
......@@ -1067,6 +1067,8 @@ InstARM32Mov::InstARM32Mov(Cfg *Func, Variable *Dest, Operand *Src,
}
}
namespace {
// These next two functions find the D register that maps to the half of the Q
// register that this instruction is accessing.
Register getDRegister(const Variable *Src, uint32_t Index) {
......@@ -1124,6 +1126,8 @@ Register getSRegister(const Variable *Src, uint32_t Index) {
return static_cast<Register>(RegARM32::RegTable[SrcReg].Aliases[Index + 3]);
}
} // end of anonymous namespace
void InstARM32Extract::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
const Type DestTy = getDest()->getType();
......@@ -1162,6 +1166,23 @@ void InstARM32Extract::emit(const Cfg *Func) const {
}
}
void InstARM32Extract::emitIAS(const Cfg *Func) const {
const Operand *Dest = getDest();
const Type DestTy = Dest->getType();
const Operand *Src = getSrc(0);
assert(isVectorType(Src->getType()));
assert(DestTy == typeElementType(Src->getType()));
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
if (isIntegerType(DestTy)) {
Asm->vmovrqi(Dest, Src, Index, getPredicate());
assert(!Asm->needsTextFixup());
return;
}
assert(isFloatingType(DestTy));
Asm->vmovsqi(Dest, Src, Index, getPredicate());
assert(!Asm->needsTextFixup());
}
void InstARM32Insert::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
const Variable *Dest = getDest();
......@@ -1193,6 +1214,24 @@ void InstARM32Insert::emit(const Cfg *Func) const {
}
}
void InstARM32Insert::emitIAS(const Cfg *Func) const {
const Variable *Dest = getDest();
const Operand *Src = getSrc(0);
const Type SrcTy = Src->getType();
assert(isVectorType(Dest->getType()));
assert(typeElementType(Dest->getType()) == SrcTy);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
if (isIntegerType(SrcTy)) {
const Operand *Src = getSrc(0);
Asm->vmovqir(Dest, Index, Src, getPredicate());
assert(!Asm->needsTextFixup());
return;
}
assert(isFloatingType(SrcTy));
Asm->vmovqis(Dest, Index, Src, getPredicate());
assert(!Asm->needsTextFixup());
}
template <InstARM32::InstKindARM32 K>
void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
......
......@@ -1368,6 +1368,7 @@ public:
InstARM32Extract(Func, Dest, Src0, Index, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Extract); }
private:
......@@ -1396,6 +1397,7 @@ public:
InstARM32Insert(Func, Dest, Src0, Index, Predicate);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Insert); }
private:
......
......@@ -27,9 +27,9 @@ define internal i32 @extract1_v4i32(<4 x i32> %src) {
%1 = extractelement <4 x i32> %src, i32 1
; ASM: vmov.32 r0, d0[1]
; ASM: vmov.32 r0, d0[1]
; DIS: 10: ee300b10
; IASM-NOT: vmov.32 r0, d0[1]
ret i32 %1
}
......@@ -40,8 +40,9 @@ define internal i32 @extract2_v4i32(<4 x i32> %src) {
%1 = extractelement <4 x i32> %src, i32 2
; ASM: vmov.32 r0, d1[0]
; ASM: vmov.32 r0, d1[0]
; DIS: 40: ee110b10
; IASM-NOT: vmov.32 r0, d1[0]
ret i32 %1
}
......@@ -53,8 +54,10 @@ define internal i32 @extract3_v8i16(<8 x i16> %src) {
%1 = extractelement <8 x i16> %src, i32 3
; ASM: vmov.s16 r0, d0[3]
; ASM: vmov.s16 r0, d0[3]
; DIS: 70: ee300b70
; IASM-NOT: vmov.s16 r0, d0[3]
%2 = sext i16 %1 to i32
ret i32 %2
}
......@@ -66,8 +69,9 @@ define internal i32 @extract4_v8i16(<8 x i16> %src) {
%1 = extractelement <8 x i16> %src, i32 4
; ASM: vmov.s16 r0, d1[0]
; ASM: vmov.s16 r0, d1[0]
; DIS: a0: ee110b30
; IASM-NOT: vmov.s16 r0, d1[0]
%2 = sext i16 %1 to i32
ret i32 %2
......@@ -80,8 +84,9 @@ define internal i32 @extract7_v4i8(<16 x i8> %src) {
%1 = extractelement <16 x i8> %src, i32 7
; ASM: vmov.s8 r0, d0[7]
; ASM: vmov.s8 r0, d0[7]
; DIS: d0: ee700b70
; IASM-NOT: vmov.s8 r0, d0[7]
%2 = sext i8 %1 to i32
ret i32 %2
......@@ -94,8 +99,9 @@ define internal i32 @extract8_v16i8(<16 x i8> %src) {
%1 = extractelement <16 x i8> %src, i32 8
; ASM: vmov.s8 r0, d1[0]
; ASM: vmov.s8 r0, d1[0]
; DIS: 100: ee510b10
; IASM-NOT: vmov.s8 r0, d1[0]
%2 = sext i8 %1 to i32
ret i32 %2
......@@ -108,8 +114,9 @@ define internal float @extract1_v4float(<4 x float> %src) {
%1 = extractelement <4 x float> %src, i32 1
; ASM: vmov.f32 s0, s1
; ASM: vmov.f32 s0, s1
; DIS: 130: eeb00a60
; IASM-NOT: vmov.f32 s0, s1
ret float %1
}
......@@ -121,8 +128,9 @@ define internal float @extract2_v4float(<4 x float> %src) {
%1 = extractelement <4 x float> %src, i32 2
; ASM: vmov.f32 s0, s2
; ASM: vmov.f32 s0, s2
; DIS: 160: eeb00a41
; IASM-NOT: vmov.f32 s0, s2
ret float %1
}
......@@ -134,8 +142,9 @@ define internal <4 x i32> @insert1_v4i32(<4 x i32> %src, i32 %s) {
%1 = insertelement <4 x i32> %src, i32 %s, i32 1
; ASM: vmov.32 d0[1], r0
; ASM: vmov.32 d0[1], r0
; DIS: 198: ee200b10
; IASM-NOT: vmov.32 d0[1], r0
ret <4 x i32> %1
}
......@@ -147,8 +156,9 @@ define internal <4 x i32> @insert2_v4i32(<4 x i32> %src, i32 %s) {
%1 = insertelement <4 x i32> %src, i32 %s, i32 2
; ASM: vmov.32 d1[0], r0
; ASM: vmov.32 d1[0], r0
; DIS: 1c8: ee010b10
; IASM-NOT: vmov.32 d1[0], r0
ret <4 x i32> %1
}
......@@ -161,8 +171,10 @@ define internal <8 x i16> @insert3_v8i16(<8 x i16> %src, i32 %s) {
%s2 = trunc i32 %s to i16
%1 = insertelement <8 x i16> %src, i16 %s2, i32 3
; ASM: vmov.16 d0[3], r0
; ASM: vmov.16 d0[3], r0
; DIS: 200: ee200b70
; IASM-NOT: vmov.16 d0[3], r0
ret <8 x i16> %1
}
......@@ -174,8 +186,10 @@ define internal <8 x i16> @insert4_v8i16(<8 x i16> %src, i32 %s) {
%s2 = trunc i32 %s to i16
%1 = insertelement <8 x i16> %src, i16 %s2, i32 4
; ASM: vmov.16 d1[0], r0
; ASM: vmov.16 d1[0], r0
; DIS: 240: ee010b30
; IASM-NOT: vmov.16 d1[0], r0
ret <8 x i16> %1
}
......@@ -187,8 +201,9 @@ define internal <16 x i8> @insert7_v4i8(<16 x i8> %src, i32 %s) {
%s2 = trunc i32 %s to i8
%1 = insertelement <16 x i8> %src, i8 %s2, i32 7
; ASM: vmov.8 d0[7], r0
; ASM: vmov.8 d0[7], r0
; DIS: 280: ee600b70
; IASM-NOT: vmov.8 d0[7], r0
ret <16 x i8> %1
}
......@@ -201,8 +216,9 @@ define internal <16 x i8> @insert8_v16i8(<16 x i8> %src, i32 %s) {
%s2 = trunc i32 %s to i8
%1 = insertelement <16 x i8> %src, i8 %s2, i32 8
; ASM: vmov.8 d1[0], r0
; ASM: vmov.8 d1[0], r0
; DIS: 2c0: ee410b10
; IASM-NOT: vmov.8 d1[0], r0
ret <16 x i8> %1
}
......@@ -214,8 +230,9 @@ define internal <4 x float> @insert1_v4float(<4 x float> %src, float %s) {
%1 = insertelement <4 x float> %src, float %s, i32 1
; ASM: vmov.f32 s1, s4
; ASM: vmov.f32 s1, s4
; DIS: 2f8: eef00a42
; IASM-NOT: vmov.f32 s1, s4
ret <4 x float> %1
}
......@@ -227,8 +244,9 @@ define internal <4 x float> @insert2_v4float(<4 x float> %src, float %s) {
%1 = insertelement <4 x float> %src, float %s, i32 2
; ASM: vmov.f32 s2, s4
; ASM: vmov.f32 s2, s4
; DIS: 328: eeb01a42
; IASM-NOT: vmov.f32 s2, s4
ret <4 x float> %1
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment