Commit bd8e28e3 by Karl Schimpf

Add VMLA (floating point) to the integrated ARM assembler.

Adds the scalar floating point versions of instruction VMLA to the integrated ARM assembler. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4334 R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1634913005 .
parent d1bd1d33
......@@ -977,42 +977,42 @@ void Assembler::vaddd(DRegister dd, DRegister dn, DRegister dm,
EmitVFPddd(cond, B21 | B20, dd, dn, dm);
}
// Moved to Arm32::AssemblerARM32::vmuls()
// Moved to Arm32::AssemblerARM32::vsubs()
void Assembler::vsubs(SRegister sd, SRegister sn, SRegister sm,
Condition cond) {
EmitVFPsss(cond, B21 | B20 | B6, sd, sn, sm);
}
// Moved to Arm32::AssemblerARM32::vmuld()
// Moved to Arm32::AssemblerARM32::vsubd()
void Assembler::vsubd(DRegister dd, DRegister dn, DRegister dm,
Condition cond) {
EmitVFPddd(cond, B21 | B20 | B6, dd, dn, dm);
}
#endif
#if 0
// Moved to Arm32::AssemblerARM32::vmuls()
void Assembler::vmuls(SRegister sd, SRegister sn, SRegister sm,
Condition cond) {
EmitVFPsss(cond, B21, sd, sn, sm);
}
// Moved to Arm32::AssemblerARM32::vmuld()
void Assembler::vmuld(DRegister dd, DRegister dn, DRegister dm,
Condition cond) {
EmitVFPddd(cond, B21, dd, dn, dm);
}
#endif
// Moved to Arm32::AssemblerARM32::vmlas()
void Assembler::vmlas(SRegister sd, SRegister sn, SRegister sm,
Condition cond) {
EmitVFPsss(cond, 0, sd, sn, sm);
}
// Moved to Arm32::AssemblerARM32::vmlad()
void Assembler::vmlad(DRegister dd, DRegister dn, DRegister dm,
Condition cond) {
EmitVFPddd(cond, 0, dd, dn, dm);
}
#endif
void Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
......
......@@ -686,8 +686,12 @@ class Assembler : public ValueObject {
void vmulqs(QRegister qd, QRegister qn, QRegister qm);
void vshlqi(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
void vshlqu(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
#if 0
// Moved to Arm32::AssemblerARM32::vmlas()
void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vmlad()
void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
#endif
void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
#if 0
......
......@@ -1045,6 +1045,15 @@ void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode,
emitInst(Encoding);
}
void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode,
const Operand *OpDd, const Operand *OpDn,
const Operand *OpDm, const char *InstName) {
IValueT Dd = encodeDRegister(OpDd, "Dd", InstName);
IValueT Dn = encodeDRegister(OpDn, "Dn", InstName);
IValueT Dm = encodeDRegister(OpDm, "Dm", InstName);
emitVFPddd(Cond, Opcode, Dd, Dn, Dm);
}
void AssemblerARM32::emitVFPsss(CondARM32::Cond Cond, IValueT Opcode,
IValueT Sd, IValueT Sn, IValueT Sm) {
assert(Sd < RegARM32::getNumSRegs());
......@@ -1060,6 +1069,15 @@ void AssemblerARM32::emitVFPsss(CondARM32::Cond Cond, IValueT Opcode,
emitInst(Encoding);
}
void AssemblerARM32::emitVFPsss(CondARM32::Cond Cond, IValueT Opcode,
const Operand *OpSd, const Operand *OpSn,
const Operand *OpSm, const char *InstName) {
const IValueT Sd = encodeSRegister(OpSd, "Sd", InstName);
const IValueT Sn = encodeSRegister(OpSn, "Sn", InstName);
const IValueT Sm = encodeSRegister(OpSm, "Sm", InstName);
emitVFPsss(Cond, Opcode, Sd, Sn, Sm);
}
void AssemblerARM32::adc(const Operand *OpRd, const Operand *OpRn,
const Operand *OpSrc1, bool SetFlags,
CondARM32::Cond Cond) {
......@@ -2075,11 +2093,8 @@ void AssemblerARM32::vadds(const Operand *OpSd, const Operand *OpSn,
// cccc11100D11nnnndddd101sN0M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
// and mmmmM=Rm.
constexpr const char *Vadds = "vadds";
IValueT Sd = encodeSRegister(OpSd, "Sd", Vadds);
IValueT Sn = encodeSRegister(OpSn, "Sn", Vadds);
IValueT Sm = encodeSRegister(OpSm, "Sm", Vadds);
constexpr IValueT VaddsOpcode = B21 | B20;
emitVFPsss(Cond, VaddsOpcode, Sd, Sn, Sm);
emitVFPsss(Cond, VaddsOpcode, OpSd, OpSn, OpSm, Vadds);
}
void AssemblerARM32::vaddd(const Operand *OpDd, const Operand *OpDn,
......@@ -2090,11 +2105,8 @@ void AssemblerARM32::vaddd(const Operand *OpDd, const Operand *OpDn,
// cccc11100D11nnnndddd101sN0M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
// and Mmmmm=Rm.
constexpr const char *Vaddd = "vaddd";
IValueT Dd = encodeDRegister(OpDd, "Dd", Vaddd);
IValueT Dn = encodeDRegister(OpDn, "Dn", Vaddd);
IValueT Dm = encodeDRegister(OpDm, "Dm", Vaddd);
constexpr IValueT VadddOpcode = B21 | B20;
emitVFPddd(Cond, VadddOpcode, Dd, Dn, Dm);
emitVFPddd(Cond, VadddOpcode, OpDd, OpDn, OpDm, Vaddd);
}
void AssemblerARM32::vcmpd(const Operand *OpDd, const Operand *OpDm,
......@@ -2305,11 +2317,8 @@ void AssemblerARM32::vdivs(const Operand *OpSd, const Operand *OpSn,
// cccc11101D00nnnndddd101sN0M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
// and mmmmM=Rm.
constexpr const char *Vdivs = "vdivs";
IValueT Sd = encodeSRegister(OpSd, "Sd", Vdivs);
IValueT Sn = encodeSRegister(OpSn, "Sn", Vdivs);
IValueT Sm = encodeSRegister(OpSm, "Sm", Vdivs);
constexpr IValueT VdivsOpcode = B23;
emitVFPsss(Cond, VdivsOpcode, Sd, Sn, Sm);
emitVFPsss(Cond, VdivsOpcode, OpSd, OpSn, OpSm, Vdivs);
}
void AssemblerARM32::vdivd(const Operand *OpDd, const Operand *OpDn,
......@@ -2320,11 +2329,8 @@ void AssemblerARM32::vdivd(const Operand *OpDd, const Operand *OpDn,
// cccc11101D00nnnndddd101sN0M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
// and Mmmmm=Rm.
constexpr const char *Vdivd = "vdivd";
IValueT Dd = encodeDRegister(OpDd, "Dd", Vdivd);
IValueT Dn = encodeDRegister(OpDn, "Dn", Vdivd);
IValueT Dm = encodeDRegister(OpDm, "Dm", Vdivd);
constexpr IValueT VdivdOpcode = B23;
emitVFPddd(Cond, VdivdOpcode, Dd, Dn, Dm);
emitVFPddd(Cond, VdivdOpcode, OpDd, OpDn, OpDm, Vdivd);
}
void AssemblerARM32::veord(const Operand *OpDd, const Operand *OpDn,
......@@ -2444,6 +2450,30 @@ void AssemblerARM32::vmovsr(const Operand *OpSn, const Operand *OpRt,
emitInst(Encoding);
}
void AssemblerARM32::vmlad(const Operand *OpDd, const Operand *OpDn,
const Operand *OpDm, CondARM32::Cond Cond) {
// VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
// vmla<c>.f64 <Dd>, <Dn>, <Dm>
//
// cccc11100d00nnnndddd1011n0M0mmmm where cccc=Cond, Ddddd=Dd, Nnnnn=Dn, and
// Mmmmm=Dm
constexpr const char *Vmlad = "vmlad";
constexpr IValueT VmladOpcode = 0;
emitVFPddd(Cond, VmladOpcode, OpDd, OpDn, OpDm, Vmlad);
}
void AssemblerARM32::vmlas(const Operand *OpSd, const Operand *OpSn,
const Operand *OpSm, CondARM32::Cond Cond) {
// VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
// vmla<c>.f32 <Sd>, <Sn>, <Sm>
//
// cccc11100d00nnnndddd1010n0M0mmmm where cccc=Cond, ddddD=Sd, nnnnN=Sn, and
// mmmmM=Sm
constexpr const char *Vmlas = "vmlas";
constexpr IValueT VmlasOpcode = 0;
emitVFPsss(Cond, VmlasOpcode, OpSd, OpSn, OpSm, Vmlas);
}
void AssemblerARM32::vmrsAPSR_nzcv(CondARM32::Cond Cond) {
// MVRS - ARM section A*.8.348, encoding A1:
// vmrs<c> APSR_nzcv, FPSCR
......@@ -2465,11 +2495,8 @@ void AssemblerARM32::vmuls(const Operand *OpSd, const Operand *OpSn,
// cccc11100D10nnnndddd101sN0M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
// and mmmmM=Rm.
constexpr const char *Vmuls = "vmuls";
IValueT Sd = encodeSRegister(OpSd, "Sd", Vmuls);
IValueT Sn = encodeSRegister(OpSn, "Sn", Vmuls);
IValueT Sm = encodeSRegister(OpSm, "Sm", Vmuls);
constexpr IValueT VmulsOpcode = B21;
emitVFPsss(Cond, VmulsOpcode, Sd, Sn, Sm);
emitVFPsss(Cond, VmulsOpcode, OpSd, OpSn, OpSm, Vmuls);
}
void AssemblerARM32::vmuld(const Operand *OpDd, const Operand *OpDn,
......@@ -2480,11 +2507,8 @@ void AssemblerARM32::vmuld(const Operand *OpDd, const Operand *OpDn,
// cccc11100D10nnnndddd101sN0M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
// and Mmmmm=Rm.
constexpr const char *Vmuld = "vmuld";
IValueT Dd = encodeDRegister(OpDd, "Dd", Vmuld);
IValueT Dn = encodeDRegister(OpDn, "Dn", Vmuld);
IValueT Dm = encodeDRegister(OpDm, "Dm", Vmuld);
constexpr IValueT VmuldOpcode = B21;
emitVFPddd(Cond, VmuldOpcode, Dd, Dn, Dm);
emitVFPddd(Cond, VmuldOpcode, OpDd, OpDn, OpDm, Vmuld);
}
void AssemblerARM32::vstrd(const Operand *OpDd, const Operand *OpAddress,
......@@ -2538,11 +2562,8 @@ void AssemblerARM32::vsubs(const Operand *OpSd, const Operand *OpSn,
// cccc11100D11nnnndddd101sN1M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
// and mmmmM=Rm.
constexpr const char *Vsubs = "vsubs";
IValueT Sd = encodeSRegister(OpSd, "Sd", Vsubs);
IValueT Sn = encodeSRegister(OpSn, "Sn", Vsubs);
IValueT Sm = encodeSRegister(OpSm, "Sm", Vsubs);
constexpr IValueT VsubsOpcode = B21 | B20 | B6;
emitVFPsss(Cond, VsubsOpcode, Sd, Sn, Sm);
emitVFPsss(Cond, VsubsOpcode, OpSd, OpSn, OpSm, Vsubs);
}
void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn,
......@@ -2553,11 +2574,8 @@ void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn,
// cccc11100D11nnnndddd101sN1M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
// and Mmmmm=Rm.
constexpr const char *Vsubd = "vsubd";
IValueT Dd = encodeDRegister(OpDd, "Dd", Vsubd);
IValueT Dn = encodeDRegister(OpDn, "Dn", Vsubd);
IValueT Dm = encodeDRegister(OpDm, "Dm", Vsubd);
constexpr IValueT VsubdOpcode = B21 | B20 | B6;
emitVFPddd(Cond, VsubdOpcode, Dd, Dn, Dm);
emitVFPddd(Cond, VsubdOpcode, OpDd, OpDn, OpDm, Vsubd);
}
void AssemblerARM32::emitVStackOp(CondARM32::Cond Cond, IValueT Opcode,
......
......@@ -390,6 +390,12 @@ public:
void vmovsr(const Operand *OpSn, const Operand *OpRt, CondARM32::Cond Cond);
void vmlad(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
CondARM32::Cond Cond);
void vmlas(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond);
// Uses APSR_nzcv as register
void vmrsAPSR_nzcv(CondARM32::Cond Cond);
......@@ -585,12 +591,20 @@ private:
const Operand *OpSrc, const char *MovName);
// Emit VFP instruction with 3 D registers.
void emitVFPddd(CondARM32::Cond Cond, IValueT Opcode, const Operand *OpDd,
const Operand *OpDn, const Operand *OpDm,
const char *InstName);
void emitVFPddd(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd, IValueT Dn,
IValueT Dm);
// Emit VFP instruction with 3 S registers.
void emitVFPsss(CondARM32::Cond Cond, IValueT Opcode, IValueT Sd, IValueT Sn,
IValueT Sm);
void emitVFPsss(CondARM32::Cond Cond, IValueT Opcode, const Operand *OpSd,
const Operand *OpSn, const Operand *OpSm,
const char *InstName);
};
} // end of namespace ARM32
......
......@@ -642,6 +642,27 @@ template <> void InstARM32Veor::emitIAS(const Cfg *Func) const {
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vmla::emitIAS(const Cfg *Func) const {
// Note: Dest == getSrc(0) for four address FP instructions.
assert(getSrcSize() == 3);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
switch (Dest->getType()) {
default:
// TODO(kschimpf) Figure out how vector operations apply.
emitUsingTextFixup(Func);
break;
case IceType_f32:
Asm->vmlas(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
assert(!Asm->needsTextFixup());
break;
case IceType_f64:
Asm->vmlad(getDest(), getSrc(1), getSrc(2), CondARM32::AL);
assert(!Asm->needsTextFixup());
break;
}
}
template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
......
; Show that we can take advantage of the vmla instruction for floating point
; operations during optimization.
; Note that we use -O2 to force the result of the fmul to be (immediately)
; available for the fadd. When using -Om1, the merge of fmul and fadd does not
; happen due to intervening register spill code.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=s20,s21,s22,d20,d21,d22 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 -reg-use=s20,s21,s22,d20,d21,d22 \
; RUN: | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: -reg-use=s20,s21,s22,d20,d21,d22 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 -reg-use=s20,s21,s22,d20,d21,d22 \
; RUN: | FileCheck %s --check-prefix=DIS
define internal float @mulAddFloat(float %f1, float %f2) {
; ASM-LABEL: mulAddFloat:
; DIS-LABEL: 00000000 <mulAddFloat>:
%v1 = fmul float %f1, 1.5
%v2 = fadd float %f2, %v1
; ASM: vmla.f32 s21, s20, s22
; DIS: 10: ee4aaa0b
; IASM-NOT: vmla
ret float %v2
}
define internal double @mulAddDouble(double %f1, double %f2) {
; ASM-LABEL: mulAddDouble:
; DIS-LABEL: 00000020 <mulAddDouble>:
%v1 = fmul double %f1, 1.5
%v2 = fadd double %f2, %v1
; ASM: vmla.f64 d21, d20, d22
; DIS: 2c: ee445ba6
; IASM-NOT: vmla
ret double %v2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment