Commit 4acf11ac by Karl Schimpf

Add VADD instruction to the ARM integrated assembler.

parent 4f69e018
......@@ -880,7 +880,8 @@ void Assembler::vstmd(BlockAddressMode am, Register base,
EmitMultiVDMemOp(cond, am, false, base, first, count);
}
#if 0
// Moved to ARM32::AssemblerARM32::emitVFPsss
void Assembler::EmitVFPsss(Condition cond, int32_t opcode,
SRegister sd, SRegister sn, SRegister sm) {
ASSERT(TargetCPUFeatures::vfp_supported());
......@@ -899,7 +900,7 @@ void Assembler::EmitVFPsss(Condition cond, int32_t opcode,
Emit(encoding);
}
// Moved to ARM32::AssemblerARM32::emitVFPddd
void Assembler::EmitVFPddd(Condition cond, int32_t opcode,
DRegister dd, DRegister dn, DRegister dm) {
ASSERT(TargetCPUFeatures::vfp_supported());
......@@ -917,7 +918,7 @@ void Assembler::EmitVFPddd(Condition cond, int32_t opcode,
(static_cast<int32_t>(dm) & 0xf);
Emit(encoding);
}
#endif
void Assembler::vmovs(SRegister sd, SRegister sm, Condition cond) {
EmitVFPsss(cond, B23 | B21 | B20 | B6, sd, S0, sm);
......@@ -964,18 +965,19 @@ bool Assembler::vmovd(DRegister dd, double d_imm, Condition cond) {
return false;
}
#if 0
// Moved to Arm32::AssemblerARM32::vadds()
void Assembler::vadds(SRegister sd, SRegister sn, SRegister sm,
Condition cond) {
EmitVFPsss(cond, B21 | B20, sd, sn, sm);
}
// Moved to Arm32::AssemblerARM32::vaddd()
void Assembler::vaddd(DRegister dd, DRegister dn, DRegister dm,
Condition cond) {
EmitVFPddd(cond, B21 | B20, dd, dn, dm);
}
#endif
void Assembler::vsubs(SRegister sd, SRegister sn, SRegister sm,
Condition cond) {
......
......@@ -647,8 +647,12 @@ class Assembler : public ValueObject {
void vstmd(BlockAddressMode am, Register base,
DRegister first, intptr_t count, Condition cond = AL);
#if 0
// Moved to Arm32::AssemblerARM32::vadds()
void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vaddd()
void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
#endif
void vaddqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
void vaddqs(QRegister qd, QRegister qn, QRegister qm);
void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
......@@ -1228,7 +1232,7 @@ class Assembler : public ValueObject {
Register rm,
Register rs);
// Moved to ARM32::AssemblerAR32::emitDivOp();
// Moved to ARM32::AssemblerARM32::emitDivOp();
void EmitDivOp(Condition cond,
int32_t opcode,
Register rd,
......@@ -1250,17 +1254,21 @@ class Assembler : public ValueObject {
DRegister start,
int32_t count);
#if 0
// Moved to ARM32::AssemblerARM32::emitVFPsss
void EmitVFPsss(Condition cond,
int32_t opcode,
SRegister sd,
SRegister sn,
SRegister sm);
// Moved to ARM32::AssemblerARM32::emitVFPddd
void EmitVFPddd(Condition cond,
int32_t opcode,
DRegister dd,
DRegister dn,
DRegister dm);
#endif
void EmitVFPsd(Condition cond,
int32_t opcode,
......
......@@ -318,6 +318,12 @@ public:
// Implements uxtb/uxth depending on type of OpSrc0.
void uxt(const Operand *OpRd, const Operand *OpSrc0, CondARM32::Cond Cond);
void vaddd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
CondARM32::Cond Cond);
void vadds(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond);
void vpop(const Variable *OpBaseReg, SizeT NumConsecRegs,
CondARM32::Cond Cond);
......@@ -383,16 +389,14 @@ private:
IValueT OpRn, const Operand *OpSrc1, bool SetFlags,
EmitChecks RuleChecks, const char *InstName);
void emitType05(CondARM32::Cond Cond, int32_t Offset, bool Link,
const char *InstName);
void emitType05(CondARM32::Cond Cond, int32_t Offset, bool Link);
// Emit ccccoooaabalnnnnttttaaaaaaaaaaaa where cccc=Cond,
// ooo=InstType, l=isLoad, b=isByte, and
// aaa0a0aaaa0000aaaaaaaaaaaa=Address. Note that Address is assumed to be
// defined by decodeAddress() in IceAssemblerARM32.cpp.
void emitMemOp(CondARM32::Cond Cond, IValueT InstType, bool IsLoad,
bool IsByte, IValueT Rt, IValueT Address,
const char *InstName);
bool IsByte, IValueT Rt, IValueT Address);
// Emit ccccxxxxxxxxxxxxddddxxxxxxxxmmmm where cccc=Cond,
// xxxxxxxxxxxx0000xxxxxxxx0000=Opcode, dddd=Rd, and mmmm=Rm.
......@@ -419,24 +423,22 @@ private:
// aaaa<<21=AddressMode, l=IsLoad, nnnn=BaseReg, and
// rrrrrrrrrrrrrrrr is bitset of Registers.
void emitMultiMemOp(CondARM32::Cond Cond, BlockAddressMode AddressMode,
bool IsLoad, IValueT BaseReg, IValueT Registers,
const char *InstName);
bool IsLoad, IValueT BaseReg, IValueT Registers);
// Pattern ccccxxxxxDxxxxxxddddxxxxiiiiiiii where cccc=Cond, ddddD=BaseReg,
// iiiiiiii=NumConsecRegs, and xxxxx0xxxxxx0000xxxx00000000=Opcode.
void emitVStackOp(CondARM32::Cond Cond, IValueT Opcode,
const Variable *OpBaseReg, SizeT NumConsecRegs,
const char *InstName);
const Variable *OpBaseReg, SizeT NumConsecRegs);
// Pattern cccc011100x1dddd1111mmmm0001nnn where cccc=Cond,
// x=Opcode, dddd=Rd, nnnn=Rn, mmmm=Rm.
void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
IValueT Rm, const char *InstName);
IValueT Rm);
// Pattern ccccxxxxxxxfnnnnddddssss1001mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
// mmmm=Rm, ssss=Rs, f=SetFlags and xxxxxxx=Opcode.
void emitMulOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
IValueT Rm, IValueT Rs, bool SetFlags, const char *InstName);
IValueT Rm, IValueT Rs, bool SetFlags);
// Pattern cccc0001101s0000ddddxxxxxtt0mmmm where cccc=Cond, s=SetFlags,
// dddd=Rd, mmmm=Rm, tt=Shift, and xxxxx is defined by OpSrc1. OpSrc1 defines
......@@ -471,6 +473,14 @@ private:
// iiiiiiiiiiiiiiii=Imm16.
void emitMovwt(CondARM32::Cond Cond, bool IsMovw, const Operand *OpRd,
const Operand *OpSrc, const char *MovName);
// Emit VFP instruction with 3 D registers.
void emitVFPddd(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd, IValueT Dn,
IValueT Dm);
// Emit VFP instruction with 3 S registers.
void emitVFPsss(CondARM32::Cond Cond, IValueT Opcode, IValueT Sd, IValueT Sn,
IValueT Sm);
};
} // end of namespace ARM32
......
......@@ -33,6 +33,8 @@ public:
ICEINSTARM32COND_TABLE
#undef X
};
static bool isDefined(Cond C) { return C != kNone; }
};
} // end of namespace Ice
......
......@@ -226,6 +226,11 @@ void InstARM32FourAddrGPR<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
}
template <InstARM32::InstKindARM32 K>
void InstARM32ThreeAddrFP<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
}
template <> void InstARM32Mla::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
......@@ -592,6 +597,25 @@ template <> void InstARM32Udiv::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
}
template <> void InstARM32Vadd::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
switch (Dest->getType()) {
default:
// TODO(kschimpf) Figure if more cases are needed.
Asm->setNeedsTextFixup();
break;
case IceType_f32:
Asm->vadds(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
break;
case IceType_f64:
Asm->vaddd(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
break;
}
if (Asm->needsTextFixup())
emitUsingTextFixup(Func);
}
InstARM32Call::InstARM32Call(Cfg *Func, Variable *Dest, Operand *CallTarget)
: InstARM32(Func, InstARM32::Call, 1, Dest) {
HasSideEffects = true;
......@@ -1404,7 +1428,7 @@ void InstARM32Pop::emitIAS(const Cfg *Func) const {
const Variable *LastDest = nullptr;
for (const Variable *Var : Dests) {
assert(Var->hasReg() && "pop only applies to registers");
int32_t Reg = RegARM32::getEncodedGPR(Var->getRegNum());
int32_t Reg = RegARM32::getEncodedGPReg(Var->getRegNum());
LastDest = Var;
GPRegisters |= (1 << Reg);
++IntegerCount;
......@@ -1536,7 +1560,7 @@ void InstARM32Push::emitIAS(const Cfg *Func) const {
const Variable *LastSrc = nullptr;
for (SizeT Index = 0; Index < getSrcSize(); ++Index) {
const auto *Var = llvm::cast<Variable>(getSrc(Index));
int32_t Reg = RegARM32::getEncodedGPR(Var->getRegNum());
int32_t Reg = RegARM32::getEncodedGPReg(Var->getRegNum());
assert(Reg != RegARM32::Encoded_Not_GPR);
LastSrc = Var;
GPRegisters |= (1 << Reg);
......
......@@ -732,6 +732,7 @@ public:
return;
emitThreeAddrFP(Opcode, this, Func);
}
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
......
......@@ -146,56 +146,99 @@ public:
#undef X
};
static inline GPRRegister getEncodedGPR(int32_t RegNum) {
assert(Reg_GPR_First <= RegNum);
assert(RegNum <= Reg_GPR_Last);
static inline void assertRegisterDefined(int32_t RegNum) {
(void)RegNum;
assert(RegNum >= 0);
assert(RegNum < Reg_NUM);
}
static inline bool isGPRegister(int32_t RegNum) {
assertRegisterDefined(RegNum);
return Table[RegNum].IsGPR;
}
static constexpr inline SizeT getNumGPRegs() {
return 0
#define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
+(isGPR)
REGARM32_TABLE
#undef X
;
}
static inline GPRRegister getEncodedGPReg(int32_t RegNum) {
assert(isGPRegister(RegNum));
return GPRRegister(Table[RegNum].Encoding);
}
static inline GPRRegister getI64PairFirstGPRNum(int32_t RegNum) {
assert(Reg_I64PAIR_First <= RegNum);
assert(RegNum <= Reg_I64PAIR_Last);
assert(isI64RegisterPair(RegNum));
return GPRRegister(Table[RegNum].Encoding);
}
static inline GPRRegister getI64PairSecondGPRNum(int32_t RegNum) {
assert(Reg_I64PAIR_First <= RegNum);
assert(RegNum <= Reg_I64PAIR_Last);
assert(isI64RegisterPair(RegNum));
return GPRRegister(Table[RegNum].Encoding + 1);
}
static inline bool isI64RegisterPair(int32_t RegNum) {
assertRegisterDefined(RegNum);
return Table[RegNum].IsI64Pair;
}
static inline bool isEncodedSReg(int32_t RegNum) {
assertRegisterDefined(RegNum);
return Table[RegNum].IsFP32;
}
static inline SizeT getNumSRegs() {
return Reg_SREG_Last + 1 - Reg_SREG_First;
static constexpr inline SizeT getNumSRegs() {
return 0
#define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
+(isFP32)
REGARM32_TABLE
#undef X
;
}
static inline SRegister getEncodedSReg(int32_t RegNum) {
assert(Reg_SREG_First <= RegNum);
assert(RegNum <= Reg_SREG_Last);
assert(isEncodedSReg(RegNum));
return SRegister(Table[RegNum].Encoding);
}
static inline bool isEncodedDReg(int32_t RegNum) {
assertRegisterDefined(RegNum);
return Table[RegNum].IsFP64;
}
static constexpr inline SizeT getNumDRegs() {
return 0
#define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
+(isFP64)
REGARM32_TABLE
#undef X
;
}
static inline DRegister getEncodedDReg(int32_t RegNum) {
assert(Reg_DREG_First <= RegNum);
assert(RegNum <= Reg_DREG_Last);
assert(isEncodedDReg(RegNum));
return DRegister(Table[RegNum].Encoding);
}
static inline bool isEncodedQReg(int32_t RegNum) {
assertRegisterDefined(RegNum);
return Table[RegNum].IsVec128;
}
static inline QRegister getEncodedQReg(int32_t RegNum) {
assert(Reg_QREG_First <= RegNum);
assert(RegNum <= Reg_QREG_Last);
assert(isEncodedQReg(RegNum));
return QRegister(Table[RegNum].Encoding);
}
static inline IceString getRegName(SizeT RegNum) {
assert(RegNum < Reg_NUM);
static inline IceString getRegName(int32_t RegNum) {
assertRegisterDefined(RegNum);
return Table[RegNum].Name;
}
};
......
; Show that we know how to translate vadd.
; NOTE: We use -O2 to get rid of memory stores.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 | FileCheck %s --check-prefix=DIS
define internal float @testVadd(float %v1, float %v2) {
; ASM-LABEL: testVadd:
; DIS-LABEL: 00000000 <testVadd>:
; IASM-LABEL: testVadd:
entry:
; ASM-NEXT: .LtestVadd$entry:
; IASM-NEXT: .LtestVadd$entry:
%res = fadd float %v1, %v2
; ASM-NEXT: vadd.f32 s0, s0, s1
; DIS-NEXT: 0: ee300a20
; IASM-NEXT: .byte 0x20
; IASM-NEXT: .byte 0xa
; IASM-NEXT: .byte 0x30
; IASM-NEXT: .byte 0xee
ret float %res
}
......@@ -33,65 +33,68 @@ entry:
; ASM-NEXT: vpush {s28, s29, s30, s31}
; DIS-NEXT: 0: ed2dea04
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xea
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xed
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xea
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xed
; ASM-NEXT: push {lr}
; DIS-NEXT: 4: e52de004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xe5
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: sub sp, sp, #12
; DIS-NEXT: 8: e24dd00c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: vmov.f64 d15, d0
; DIS-NEXT: c: eeb0fb40
; IASM-NEXT: vmov.f64 d15, d0
; IASM-NEXT: vmov.f64 d15, d0
; ASM-NEXT: vmov.f64 d14, d1
; DIS-NEXT: 10: eeb0eb41
; IASM-NEXT: vmov.f64 d14, d1
; IASM-NEXT: vmov.f64 d14, d1
call void @foo()
; ASM-NEXT: bl foo
; DIS-NEXT: 14: ebfffffe
; IASM-NEXT: bl foo @ .word ebfffffe
; IASM-NEXT: bl foo @ .word ebfffffe
%res = fadd double %v1, %v2
; ASM-NEXT: vadd.f64 d15, d15, d14
; DIS-NEXT: 18: ee3ffb0e
; IASM-NEXT: vadd.f64 d15, d15, d14
; IASM-NEXT: .byte 0xe
; IASM-NEXT: .byte 0xfb
; IASM-NEXT: .byte 0x3f
; IASM-NEXT: .byte 0xee
; ASM-NEXT: vmov.f64 d0, d15
; DIS-NEXT: 1c: eeb00b4f
; IASM-NEXT: vmov.f64 d0, d15
; IASM-NEXT: vmov.f64 d0, d15
ret double %res
; ASM-NEXT: add sp, sp, #12
; DIS-NEXT: 20: e28dd00c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe2
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: pop {lr}
; ASM-NEXT: # lr = def.pseudo
; DIS-NEXT: 24: e49de004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe4
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe4
; ASM-NEXT: vpop {s28, s29, s30, s31}
; ASM-NEXT: # s28 = def.pseudo
......@@ -99,17 +102,17 @@ entry:
; ASM-NEXT: # s30 = def.pseudo
; ASM-NEXT: # s31 = def.pseudo
; DIS-NEXT: 28: ecbdea04
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xea
; IASM-NEXT: .byte 0xbd
; IASM-NEXT: .byte 0xec
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xea
; IASM-NEXT: .byte 0xbd
; IASM-NEXT: .byte 0xec
; ASM-NEXT: bx lr
; DIS-NEXT: 2c: e12fff1e
; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
; IASM-NEXT: .byte 0xe1
; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
; IASM-NEXT: .byte 0xe1
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment